diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000000..e60de835e6 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,142 @@ +# This workflows will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Build docs + +on: + release: + types: [published] + push: + branches: + - master + - develop + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Fetch tags + run: git fetch --prune --unshallow + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build twine + - name: Install package + run: pip install .[doc] + - name: Revert version to most recent version tag + run: git checkout $(git tag -l | grep 'v.*' | tail -n 1 | awk -F post '{print $1}') + - name: Build docs + run: | + cd docs + make html + cd .. + - uses: actions/upload-artifact@v4 + with: + name: docs + path: docs/_build/html + + build-new: + runs-on: ubuntu-latest + # Set up the environment so that it finds conda + defaults: + run: + shell: bash -l {0} + steps: + - name: Install Pandoc for NBSphinx + run: | + sudo apt-get update + sudo apt-get install -y pandoc + - name: Install Dependencies for virtual notifications in Adv.-Exec Tutorial + run: | + sudo apt update + sudo apt install -y xvfb libnotify-bin dbus-x11 xfce4-notifyd + - name: Start Virtual Display (for notifications) + run: | + Xvfb :99 & + export DISPLAY=:99 + eval "$(dbus-launch --sh-syntax)" + echo "DISPLAY=:99" >> $GITHUB_ENV + echo "DBUS_SESSION_BUS_ADDRESS=$DBUS_SESSION_BUS_ADDRESS" >> $GITHUB_ENV + - name: Start Notification Daemon (for notifications) + run: | + xfce4-notifyd & + sleep 2 # Give it some time to start + - name: Send Notification (test notifications) + run: | + notify-send "GitHub Runner Notification" "This is a test notification from GitHub Actions" + - name: Debug Running Processes (for notifications) + run: | + ps aux | grep notify + ps aux | grep xfce4-notifyd + dbus-monitor --session & + sleep 3 + - uses: actions/checkout@v4 + - name: Fetch tags + run: git fetch --prune --unshallow + - name: Install Minconda + uses: conda-incubator/setup-miniconda@v3 + with: + auto-activate-base: true + activate-environment: "" + - name: Install MRtrix via Conda + run: | + conda install -c mrtrix3 mrtrix3 + mrconvert --version + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build twine + - name: Install package + run: pip install .[doc] + - name: Install Python3 kernel + run: python -m ipykernel install --user + - name: Build new docs + run: | + cd new-docs + make html + cd .. + - uses: actions/upload-artifact@v4 + with: + name: new-docs + path: new-docs/build/html + + deploy: + needs: [build, build-new] + runs-on: ubuntu-latest + steps: + - name: Download docs + uses: actions/download-artifact@v4 + with: + name: docs + path: docs-build + - name: Download new docs + uses: actions/download-artifact@v4 + with: + name: new-docs + path: docs-build/new + - name: Check for GHPAGES_DEPLOY_KEY token + id: deployable + # if: github.event_name == 'release' + env: + GHPAGES_DEPLOY_KEY: "${{ secrets.GHPAGES_DEPLOY_KEY }}" + run: if [ -n "$GHPAGES_DEPLOY_KEY" ]; then echo "DEPLOY=true" >> $GITHUB_OUTPUT; fi + - name: Deploy Docs to GitHub Pages + if: steps.deployable.outputs.DEPLOY + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GHPAGES_DEPLOY_KEY }} + publish_dir: docs-build diff --git a/.github/workflows/testdask.yml b/.github/workflows/testdask.yml index 7ca8a29f51..e14ba1f405 100644 --- a/.github/workflows/testdask.yml +++ b/.github/workflows/testdask.yml @@ -4,6 +4,7 @@ on: push: branches: - master + - develop pull_request: concurrency: @@ -18,7 +19,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest] - python-version: ['3.9', '3.10', '3.11', '3.12'] + python-version: ['3.11', '3.12', '3.13'] fail-fast: false runs-on: ${{ matrix.os }} @@ -27,7 +28,8 @@ jobs: uses: actions/checkout@v4 with: repository: ${{ github.repository }} - + - name: Fetch tags + run: git fetch --prune --unshallow - name: Setup Python version ${{ matrix.python-version }} uses: actions/setup-python@v5 with: diff --git a/.github/workflows/testpsijlocal.yml b/.github/workflows/testpsijlocal.yml index 2e1a752ed2..3ad359c505 100644 --- a/.github/workflows/testpsijlocal.yml +++ b/.github/workflows/testpsijlocal.yml @@ -4,6 +4,7 @@ on: push: branches: - master + - develop pull_request: concurrency: @@ -27,7 +28,8 @@ jobs: uses: actions/checkout@v4 with: repository: ${{ github.repository }} - + - name: Fetch tags + run: git fetch --prune --unshallow - name: Setup Python version ${{ matrix.python-version }} uses: actions/setup-python@v5 with: diff --git a/.github/workflows/testpsijslurm.yml b/.github/workflows/testpsijslurm.yml index 9dc9100800..b0fe551ba3 100644 --- a/.github/workflows/testpsijslurm.yml +++ b/.github/workflows/testpsijslurm.yml @@ -4,6 +4,7 @@ on: push: branches: - master + - develop pull_request: concurrency: @@ -24,6 +25,8 @@ jobs: - name: Disable etelemetry run: echo "NO_ET=TRUE" >> $GITHUB_ENV - uses: actions/checkout@v4 + - name: Fetch tags + run: git fetch --prune --unshallow - name: Pull docker image run: | docker pull $DOCKER_IMAGE @@ -47,7 +50,7 @@ jobs: docker exec slurm bash -c "CONFIGURE_OPTS=\"-with-openssl=/opt/openssl\" pyenv install -v 3.11.5" fi docker exec slurm bash -c "pyenv global ${{ matrix.python-version }}" - docker exec slurm bash -c "pip install --upgrade pip && pip install -e /pydra[test,psij] && python -c 'import pydra; print(pydra.__version__)'" + docker exec slurm bash -c "pip install --upgrade pip && pip install -e /pydra[test,psij] && python -c 'import pydra.engine; print(pydra.engine.__version__)'" - name: Run pytest run: | docker exec slurm bash -c "pytest --color=yes -vs -n auto --psij=slurm --cov pydra --cov-config /pydra/.coveragerc --cov-report xml:/pydra/cov.xml --doctest-modules /pydra/pydra/ -k 'not test_audit_prov and not test_audit_prov_messdir_1 and not test_audit_prov_messdir_2 and not test_audit_prov_wf and not test_audit_all'" diff --git a/.github/workflows/testpydra.yml b/.github/workflows/testpydra.yml index 3ead2e3a6b..9865b73137 100644 --- a/.github/workflows/testpydra.yml +++ b/.github/workflows/testpydra.yml @@ -4,6 +4,7 @@ on: push: branches: - master + - develop pull_request: defaults: @@ -50,75 +51,36 @@ jobs: strategy: matrix: os: [macos-latest, ubuntu-latest, windows-latest] - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] - install: ['wheel'] - include: - - os: 'ubuntu-latest' - python-version: '3.11' - install: 'sdist' - - os: 'ubuntu-latest' - python-version: '3.11' - install: 'repo' - - os: 'ubuntu-latest' - python-version: '3.11' - install: 'archive' + python-version: ['3.11', '3.12', '3.13'] fail-fast: false runs-on: ${{ matrix.os }} steps: - - name: Fetch sdist/wheel - uses: actions/download-artifact@v4 - if: matrix.install == 'sdist' || matrix.install == 'wheel' - with: - name: dist - path: dist/ - - name: Fetch git archive - uses: actions/download-artifact@v4 - if: matrix.install == 'archive' - with: - name: archive - path: archive/ - name: Fetch repository uses: actions/checkout@v4 - if: matrix.install == 'repo' - + - name: Fetch tags + run: git fetch --prune --unshallow - name: Set up Python ${{ matrix.python-version }} on ${{ matrix.os }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + - name: Update pip run: python -m pip install --upgrade pip - - name: Determine installation target - run: | - if [[ "$INSTALL" = "sdist" ]]; then - echo "ARCHIVE=$( ls dist/*.tar.gz )" >> $GITHUB_ENV - elif [[ "$INSTALL" = "wheel" ]]; then - echo "ARCHIVE=$( ls dist/*.whl )" >> $GITHUB_ENV - elif [[ "$INSTALL" = "archive" ]]; then - echo "ARCHIVE=$( ls archive/*.zip )" >> $GITHUB_ENV - elif [[ "$INSTALL" = "repo" ]]; then - echo "ARCHIVE=." >> $GITHUB_ENV - fi - env: - INSTALL: ${{ matrix.install }} - - name: Install Pydra - run: pip install $ARCHIVE + run: pip install .[test] - name: Print version - run: python -c "import pydra; print(pydra.__version__)" - - - name: Install Pydra tests dependencies - run: pip install pydra[test] + run: python -c "import pydra.engine; print(pydra.engine.__version__)" - name: Disable etelemetry run: echo "NO_ET=TRUE" >> $GITHUB_ENV - name: Pytest run: | - pytest -vs -n auto --doctest-modules --pyargs pydra \ + pytest -vs -n auto --doctest-modules \ --cov pydra --cov-config .coveragerc --cov-report xml:cov.xml - name: Upload to codecov diff --git a/.github/workflows/testsingularity.yml b/.github/workflows/testsingularity.yml index 6cb597cdf8..c989334176 100644 --- a/.github/workflows/testsingularity.yml +++ b/.github/workflows/testsingularity.yml @@ -4,6 +4,7 @@ on: push: branches: - master + - develop pull_request: concurrency: @@ -13,10 +14,10 @@ concurrency: jobs: build: name: Build - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: matrix: - python-version: [3.8, 3.9, "3.10", "3.11"] + python-version: ['3.11', '3.12', '3.13'] fail-fast: False steps: @@ -64,11 +65,13 @@ jobs: uses: actions/checkout@v4 with: repository: ${{ github.repository }} + - name: Fetch tags + run: git fetch --prune --unshallow - name: Install pydra (test) run: pip install -e ".[test]" - name: Pytest - run: pytest -vs --cov pydra --cov-config .coveragerc --cov-report xml:cov.xml pydra/engine/tests/test_singularity.py + run: pytest -vs --cov pydra --cov-config .coveragerc --cov-report xml:cov.xml pydra/engine/tests/test_singularity.py pydra/engine/tests/test_environments.py - name: Upload to codecov run: codecov -f cov.xml -F unittests -e GITHUB_WORKFLOW diff --git a/.github/workflows/testslurm.yml b/.github/workflows/testslurm.yml index 0e1d17f09b..3e715a127b 100644 --- a/.github/workflows/testslurm.yml +++ b/.github/workflows/testslurm.yml @@ -4,6 +4,7 @@ on: push: branches: - master + - develop pull_request: concurrency: @@ -14,7 +15,7 @@ jobs: build: strategy: matrix: - python-version: [3.8.16, 3.9.16, 3.10.9, 3.11.5] + python-version: [3.11.5] fail-fast: false runs-on: ubuntu-latest env: @@ -24,6 +25,8 @@ jobs: - name: Disable etelemetry run: echo "NO_ET=TRUE" >> $GITHUB_ENV - uses: actions/checkout@v4 + - name: Fetch tags + run: git fetch --prune --unshallow - name: Pull docker image run: | docker pull $DOCKER_IMAGE @@ -47,7 +50,7 @@ jobs: docker exec slurm bash -c "CONFIGURE_OPTS=\"-with-openssl=/opt/openssl\" pyenv install -v 3.11.5" fi docker exec slurm bash -c "pyenv global ${{ matrix.python-version }}" - docker exec slurm bash -c "pip install --upgrade pip && pip install -e /pydra[test] && python -c 'import pydra; print(pydra.__version__)'" + docker exec slurm bash -c "pip install --upgrade pip && pip install -e /pydra[test] && python -c 'import pydra.engine; print(pydra.engine.__version__)'" - name: Run pytest run: | docker exec slurm bash -c "pytest --color=yes -vs --cov pydra --cov-config /pydra/.coveragerc --cov-report xml:/pydra/cov.xml --doctest-modules /pydra/pydra/ -k 'not test_audit_prov and not test_audit_prov_messdir_1 and not test_audit_prov_messdir_2 and not test_audit_prov_wf and not test_audit_all'" diff --git a/.gitignore b/.gitignore index da16b937b9..4fba2fa8de 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ dist __pycache__ *.pyc +.python-version .ipynb_checkpoints .vscode/ @@ -18,6 +19,8 @@ cov.xml *.venv .DS_Store +.ipynb_checkpoints # This can be generated in-tree. We never want to commit it. pydra/_version.py +pydra/engine/_version.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f36105398e..2ea004790e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,24 +1,29 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-yaml - - id: check-added-large-files -- repo: https://github.com/psf/black - rev: 24.4.2 - hooks: - - id: black -- repo: https://github.com/codespell-project/codespell - rev: v2.3.0 - hooks: - - id: codespell - additional_dependencies: - - tomli -- repo: https://github.com/PyCQA/flake8 - rev: 7.0.0 - hooks: - - id: flake8 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + - repo: https://github.com/psf/black + rev: 24.4.2 + hooks: + - id: black + - repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + additional_dependencies: + - tomli + - repo: https://github.com/PyCQA/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + - repo: https://github.com/kynan/nbstripout + rev: 0.5.0 + hooks: + - id: nbstripout + files: \.(ipynb)$ diff --git a/benchmark.py b/benchmark.py new file mode 100644 index 0000000000..35f6de914d --- /dev/null +++ b/benchmark.py @@ -0,0 +1,38 @@ +import asyncio +import time + + +def sync_function(x): + return x * 2 + + +async def async_function(x): + return x * 2 + + +def benchmark_sync(): + start_time = time.time() + for _ in range(1000000): + sync_function(10) + end_time = time.time() + return end_time - start_time + + +async def benchmark_async(): + start_time = time.time() + for _ in range(1000000): + await async_function(10) + end_time = time.time() + return end_time - start_time + + +def main(): + sync_time = benchmark_sync() + print(f"Sync function time: {sync_time:.6f} seconds") + + async_time = asyncio.run(benchmark_async()) + print(f"Async function time: {async_time:.6f} seconds") + + +if __name__ == "__main__": + main() diff --git a/docs/changes.rst b/docs/changes.rst index 4e23840e90..cec100a607 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -6,7 +6,7 @@ Release Notes * refactoring template formatting for ``input_spec`` * fixing issues with input fields with extension (and using them in templates) -* adding simple validators to input spec (using ``attr.validator``) +* adding simple validators to input definition (using ``attr.validator``) * adding ``create_dotfile`` for workflows, that creates graphs as dotfiles (can convert to other formats if dot available) * adding a simple user guide with ``input_spec`` description * expanding docstrings for ``State``, ``audit`` and ``messenger`` @@ -108,7 +108,7 @@ Release Notes --- * big changes in ``ShellTask``, ``DockerTask`` and ``SingularityTask`` - * customized input specification and output specification for ``Task``\s + * customized input definition and output definition for ``Task``\s * adding singularity checks to Travis CI * binding all input files to the container * changes in ``Workflow`` diff --git a/docs/components.rst b/docs/components.rst index d4928e82c6..d35727f2a0 100644 --- a/docs/components.rst +++ b/docs/components.rst @@ -66,7 +66,7 @@ Shell Command Tasks The *Task* can accommodate more complex shell commands by allowing the user to customize inputs and outputs of the commands. One can generate an input - specification to specify names of inputs, positions in the command, types of + definition to specify names of inputs, positions in the command, types of the inputs, and other metadata. As a specific example, FSL's BET command (Brain Extraction Tool) can be called on the command line as: @@ -76,7 +76,7 @@ Shell Command Tasks bet input_file output_file -m Each of the command argument can be treated as a named input to the - ``ShellCommandTask``, and can be included in the input specification. + ``ShellCommandTask``, and can be included in the input definition. As shown next, even an output is specified by constructing the *out_file* field form a template: @@ -86,18 +86,18 @@ Shell Command Tasks name="Input", fields=[ ( "in_file", File, - { "help_string": "input file ...", + { "help": "input file ...", "position": 1, "mandatory": True } ), ( "out_file", str, - { "help_string": "name of output ...", + { "help": "name of output ...", "position": 2, "output_file_template": "{in_file}_br" } ), ( "mask", bool, - { "help_string": "create binary mask", + { "help": "create binary mask", "argstr": "-m", } ) ], - bases=(ShellSpec,) ) + bases=(ShellDef,) ) ShellCommandTask(executable="bet", input_spec=bet_input_spec) diff --git a/docs/conf.py b/docs/conf.py index fd0b69ca43..22034515bc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,8 +16,8 @@ sys.path.insert(0, str(Path(__file__).parent.parent.absolute())) sys.path.insert(1, str(Path(__file__).parent / "sphinxext")) -from pydra import __version__ -from github_link import make_linkcode_resolve +from pydra.engine import __version__ # noqa: E402 +from github_link import make_linkcode_resolve # noqa: E402 # -- Project information ----------------------------------------------------- diff --git a/docs/input_spec.rst b/docs/input_spec.rst index 48d66fd814..4e1148c306 100644 --- a/docs/input_spec.rst +++ b/docs/input_spec.rst @@ -5,7 +5,7 @@ Input Specification As it was mentioned in :ref:`shell_command_task`, the user can customize the input and output for the `ShellCommandTask`. -In this section, more examples of the input specification will be provided. +In this section, more examples of the input definition will be provided. Let's start from the previous example: @@ -16,40 +16,40 @@ Let's start from the previous example: name="Input", fields=[ ( "in_file", File, - { "help_string": "input file ...", + { "help": "input file ...", "position": 1, "mandatory": True } ), ( "out_file", str, - { "help_string": "name of output ...", + { "help": "name of output ...", "position": 2, "output_file_template": "{in_file}_br" } ), ( "mask", bool, - { "help_string": "create binary mask", + { "help": "create binary mask", "argstr": "-m", } ) ], - bases=(ShellSpec,) ) + bases=(ShellDef,) ) ShellCommandTask(executable="bet", input_spec=bet_input_spec) -In order to create an input specification, a new `SpecInfo` object has to be created. -The field `name` specifies the type of the spec and it should be always "Input" for -the input specification. -The field `bases` specifies the "base specification" you want to use (can think about it as a -`parent class`) and it will usually contains `ShellSpec` only, unless you want to build on top of -your other specification (this will not be cover in this section). +In order to create an input definition, a new `SpecInfo` object has to be created. +The field `name` specifies the type of the definition and it should be always "Input" for +the input definition. +The field `bases` specifies the "base definition" you want to use (can think about it as a +`parent class`) and it will usually contains `ShellDef` only, unless you want to build on top of +your other definition (this will not be cover in this section). The part that should be always customised is the `fields` part. -Each element of the `fields` is a separate input field that is added to the specification. +Each element of the `fields` is a separate input field that is added to the definition. In this example, three-elements tuples - with name, type and dictionary with additional information - are used. But this is only one of the supported syntax, more options will be described below. -Adding a New Field to the Spec +Adding a New Field to the Def ------------------------------ -Pydra uses `attr` classes to represent the input specification, and the full syntax for each field +Pydra uses `attr` classes to represent the input definition, and the full syntax for each field is: .. code-block:: python @@ -109,10 +109,10 @@ There are also special types provided by Pydra: Metadata -------- -In the example we used multiple keys in the metadata dictionary including `help_string`, +In the example we used multiple keys in the metadata dictionary including `help`, `position`, etc. In this section all allowed key will be described: -`help_string` (`str`, mandatory): +`help` (`str`, mandatory): A short description of the input field. `mandatory` (`bool`, default: `False`): @@ -150,17 +150,17 @@ In the example we used multiple keys in the metadata dictionary including `help_ If `True` a path will be consider as a path inside the container (and not as a local path). `output_file_template` (`str`): - If provided, the field is treated also as an output field and it is added to the output spec. + If provided, the field is treated also as an output field and it is added to the output definition. The template can use other fields, e.g. `{file1}`. - Used in order to create an output specification. + Used in order to create an output definition. `output_field_name` (`str`, used together with `output_file_template`) - If provided the field is added to the output spec with changed name. - Used in order to create an output specification. + If provided the field is added to the output definition with changed name. + Used in order to create an output definition. `keep_extension` (`bool`, default: `True`): A flag that specifies if the file extension should be removed from the field value. - Used in order to create an output specification. + Used in order to create an output definition. `readonly` (`bool`, default: `False`): If `True` the input field can't be provided by the user but it aggregates other input fields @@ -174,8 +174,9 @@ In the example we used multiple keys in the metadata dictionary including `help_ (a specific input field will be sent). -Validators ----------- -Pydra allows for using simple validator for types and `allowev_values`. -The validators are disabled by default, but can be enabled by calling -`pydra.set_input_validator(flag=True)`. +`shell_arg` Function +-------------------- + +For convenience, there is a function in `pydra.mark` called `shell_arg()`, which will +takes the above metadata values as arguments and inserts them into the metadata passed +to `attrs.field`. This can be especially useful when using an IDE with code-completion. diff --git a/docs/output_spec.rst b/docs/output_spec.rst index 2e0907076b..7ade54e2c4 100644 --- a/docs/output_spec.rst +++ b/docs/output_spec.rst @@ -5,7 +5,7 @@ Output Specification As it was mentioned in :ref:`shell_command_task`, the user can customize the input and output for the `ShellCommandTask`. -In this section, the output specification will be covered. +In this section, the output definition will be covered. Instead of using field with `output_file_template` in the customized `input_spec` to specify an output field, @@ -23,13 +23,13 @@ a customized `output_spec` can be used, e.g.: type=File, metadata={ "output_file_template": "{inp1}", - "help_string": "output file", + "help": "output file", "requires": ["inp1", "inp2"] }, ), ) ], - bases=(ShellOutSpec,), + bases=(ShellOutDef,), ) ShellCommandTask(executable=executable, @@ -37,18 +37,18 @@ a customized `output_spec` can be used, e.g.: -Similarly as for `input_spec`, in order to create an output specification, +Similarly as for `input_spec`, in order to create an output definition, a new `SpecInfo` object has to be created. -The field `name` specifies the type of the spec and it should be always "Output" for -the output specification. -The field `bases` specifies the "base specification" you want to use (can think about it as a -`parent class`) and it will usually contains `ShellOutSpec` only, unless you want to build on top of -your other specification (this will not be cover in this section). +The field `name` specifies the type of the definition and it should be always "Output" for +the output definition. +The field `bases` specifies the "base definition" you want to use (can think about it as a +`parent class`) and it will usually contains `ShellOutDef` only, unless you want to build on top of +your other definition (this will not be cover in this section). The part that should be always customised is the `fields` part. -Each element of the `fields` is a separate output field that is added to the specification. +Each element of the `fields` is a separate output field that is added to the definition. In this example, a three-elements tuple - with name, type and dictionary with additional information - is used. -See :ref:`Input Specification section` for other recognized syntax for specification's fields +See :ref:`Input Specification section` for other recognized syntax for definition's fields and possible types. @@ -58,7 +58,7 @@ Metadata The metadata dictionary for `output_spec` can include: -`help_string` (`str`, mandatory): +`help` (`str`, mandatory): A short description of the input field. The same as in `input_spec`. `mandatory` (`bool`, default: `False`): @@ -69,7 +69,7 @@ The metadata dictionary for `output_spec` can include: The template can use other fields, e.g. `{file1}`. The same as in `input_spec`. `output_field_name` (`str`, used together with `output_file_template`) - If provided the field is added to the output spec with changed name. + If provided the field is added to the output definition with changed name. The same as in `input_spec`. `keep_extension` (`bool`, default: `True`): diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000..f2712b1ae6 --- /dev/null +++ b/environment.yml @@ -0,0 +1,25 @@ +name: pydra-tutorial +channels: + - conda-forge + - defaults +dependencies: + - datalad + - pip + - pip: + - pydra==0.23.0a0 + - jupyter + - jupyter_contrib_nbextensions + - jupytext + - jupyterlab + - matplotlib + - nbformat + - nbval + - nest_asyncio + - psutil + - sh + - pytest + - numpy + - pandas + - scipy + - nibabel + - nilearn diff --git a/example.py b/example.py new file mode 100644 index 0000000000..dd9dc87a5f --- /dev/null +++ b/example.py @@ -0,0 +1,34 @@ +import asyncio + + +def is_coroutine_function(func): + return asyncio.iscoroutinefunction(func) + + +async def call_function(func, *args, **kwargs): + if is_coroutine_function(func): + return await func(*args, **kwargs) + else: + return func(*args, **kwargs) + + +# Example usage +async def async_function(x): + await asyncio.sleep(1) + return x * 2 + + +def sync_function(x): + return x * 2 + + +async def main(): + result1 = await call_function(async_function, 10) + result2 = await call_function(sync_function, 10) + print(result1) # Output: 20 + print(result2) # Output: 20 + + +# To run the example +if __name__ == "__main__": + asyncio.run(main()) diff --git a/new-docs/Makefile b/new-docs/Makefile new file mode 100644 index 0000000000..e6d46dcbcc --- /dev/null +++ b/new-docs/Makefile @@ -0,0 +1,192 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " applehelp to make an Apple Help Book" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + @echo " coverage to run coverage check of the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Pype9.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Pype9.qhc" + +applehelp: + $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp + @echo + @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." + @echo "N.B. You won't be able to view it unless you put it in" \ + "~/Library/Documentation/Help or install it in your application" \ + "bundle." + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/Pype9" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Pype9" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +coverage: + $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage + @echo "Testing of coverage in the sources finished, look at the " \ + "results in $(BUILDDIR)/coverage/python.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/new-docs/make.bat b/new-docs/make.bat new file mode 100644 index 0000000000..523fa3eb58 --- /dev/null +++ b/new-docs/make.bat @@ -0,0 +1,263 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source +set I18NSPHINXOPTS=%SPHINXOPTS% source +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^` where ^ is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. xml to make Docutils-native XML files + echo. pseudoxml to make pseudoxml-XML files for display purposes + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + echo. coverage to run coverage check of the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + + +REM Check if sphinx-build is available and fallback to Python version if any +%SPHINXBUILD% 2> nul +if errorlevel 9009 goto sphinx_python +goto sphinx_ok + +:sphinx_python + +set SPHINXBUILD=python -m sphinx.__init__ +%SPHINXBUILD% 2> nul +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +:sphinx_ok + + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Pype9.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Pype9.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdf" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf + cd %~dp0 + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdfja" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf-ja + cd %~dp0 + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +if "%1" == "coverage" ( + %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage + if errorlevel 1 exit /b 1 + echo. + echo.Testing of coverage in the sources finished, look at the ^ +results in %BUILDDIR%/coverage/python.txt. + goto end +) + +if "%1" == "xml" ( + %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The XML files are in %BUILDDIR%/xml. + goto end +) + +if "%1" == "pseudoxml" ( + %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. + goto end +) + +:end diff --git a/new-docs/source/_static/css/custom.css b/new-docs/source/_static/css/custom.css new file mode 100644 index 0000000000..161e475987 --- /dev/null +++ b/new-docs/source/_static/css/custom.css @@ -0,0 +1,4 @@ +div.nbinput .prompt, +div.nboutput .prompt { + display: none; +} diff --git a/new-docs/source/_static/images/nd_spl_1.png b/new-docs/source/_static/images/nd_spl_1.png new file mode 100644 index 0000000000..e4967901dc Binary files /dev/null and b/new-docs/source/_static/images/nd_spl_1.png differ diff --git a/new-docs/source/_static/images/nd_spl_3.png b/new-docs/source/_static/images/nd_spl_3.png new file mode 100644 index 0000000000..e4e95b4e72 Binary files /dev/null and b/new-docs/source/_static/images/nd_spl_3.png differ diff --git a/new-docs/source/_static/images/nd_spl_3_comb1.png b/new-docs/source/_static/images/nd_spl_3_comb1.png new file mode 100644 index 0000000000..dd19db24f9 Binary files /dev/null and b/new-docs/source/_static/images/nd_spl_3_comb1.png differ diff --git a/new-docs/source/_static/images/nd_spl_3_comb3.png b/new-docs/source/_static/images/nd_spl_3_comb3.png new file mode 100644 index 0000000000..b50fad23f5 Binary files /dev/null and b/new-docs/source/_static/images/nd_spl_3_comb3.png differ diff --git a/new-docs/source/_static/images/nd_spl_4.png b/new-docs/source/_static/images/nd_spl_4.png new file mode 100644 index 0000000000..e900bc3298 Binary files /dev/null and b/new-docs/source/_static/images/nd_spl_4.png differ diff --git a/new-docs/source/_static/logo/pydra_logo.jpg b/new-docs/source/_static/logo/pydra_logo.jpg new file mode 100644 index 0000000000..8cdefaad2e Binary files /dev/null and b/new-docs/source/_static/logo/pydra_logo.jpg differ diff --git a/new-docs/source/_static/logo/pydra_logo.png b/new-docs/source/_static/logo/pydra_logo.png new file mode 100644 index 0000000000..d5f2fa2325 Binary files /dev/null and b/new-docs/source/_static/logo/pydra_logo.png differ diff --git a/new-docs/source/_static/logo/pydra_logo.svg b/new-docs/source/_static/logo/pydra_logo.svg new file mode 100644 index 0000000000..05e3daa885 --- /dev/null +++ b/new-docs/source/_static/logo/pydra_logo.svg @@ -0,0 +1,150 @@ + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/new-docs/source/conf.py b/new-docs/source/conf.py new file mode 100644 index 0000000000..683a83a0ba --- /dev/null +++ b/new-docs/source/conf.py @@ -0,0 +1,401 @@ +# -*- coding: utf-8 -*- +# +# Pype9 documentation build configuration file, created by +# sphinx-quickstart on Thu Mar 30 21:41:02 2017. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. +from __future__ import print_function +import typing as ty +import datetime + +from pydra.engine import __version__ # noqa + + +authors = [("Nipype developers", "neuroimaging@python.org")] + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use op.abspath to make it absolute, like shown here. +# sys.path.insert(0, op.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.doctest", + "sphinx.ext.intersphinx", + "sphinx.ext.todo", + "sphinx.ext.coverage", + "sphinx.ext.mathjax", + "nbsphinx", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "sphinx.ext.autosectionlabel", + "sphinxarg.ext", + "sphinx_click.ext", + "numpydoc", +] + + +nbsphinx_allow_errors = False + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = ".rst" + +# The encoding of source files. +# source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = "index" + +# General information about the project. +project = "Pydra" +author = ", ".join(a for a, _ in authors) +copyright = "{}, {}".format(datetime.datetime.now().year, author) + + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = ".".join(__version__.split(".")[:2]) +# The full version, including alpha/beta/rc tags. +release = __version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +# language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +# today = '' +# Else, today_fmt is used as the format for a strftime call. +# today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns: ty.List[str] = [] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +# default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +# add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +# add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +# show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "lovelace" +pygments_dark_style = "fruity" + +# A list of ignored prefixes for module index sorting. +# modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +# keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = "furo" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +html_theme_options = { + "light_css_variables": { + "color-brand-primary": "#69306d", + "color-brand-content": "#69306d", + }, + "dark_css_variables": { + "color-brand-primary": "#ce8dcf", + "color-brand-content": "#ce8dcf", + }, +} + +html_static_path = ["_static"] +html_css_files = ["css/custom.css"] + +# Add any paths that contain custom themes here, relative to this directory. +# html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +html_title = "Pydra v{}".format(version) + +# A shorter title for the navigation bar. Default is the same as html_title. +# html_short_title = 'Pydra v' + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +html_logo = "_static/logo/pydra_logo.png" + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +html_favicon = "_static/logo/pydra_logo.png" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +# html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +# html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +# html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +# html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +# html_additional_pages = {} + +# If false, no module index is generated. +# html_domain_indices = True + +# If false, no index is generated. +# html_use_index = True + +# If true, the index is split into individual pages for each letter. +# html_split_index = False + +# If true, links to the reST sources are added to the pages. +# html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +# html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +# html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +# html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +# html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' +# html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# Now only 'ja' uses this config value +# html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +# html_search_scorer = 'scorer.js' + +language = "English" + +# Output file base name for HTML help builder. +htmlhelp_basename = "Pydra" + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + "papersize": "a4paper", + # The font size ('10pt', '11pt' or '12pt'). + # 'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + # 'preamble': '', + # Latex figure (float) alignment + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, "pydra.tex", "Pydra Documentation", author, "manual"), +] + +# Autodoc settings +autodoc_default_options = { + "undoc-members": True, + "show-inheritance": True, +} + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +# latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +# latex_use_parts = False + +# If true, show page references after internal links. +# latex_show_pagerefs = False + +# If true, show URL addresses after external links. +# latex_show_urls = False + +# Documents to append as an appendix to all manuals. +# latex_appendices = [] + +# If false, no module index is generated. +# latex_domain_indices = True + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [(master_doc, "pydra", "Pydra Documentation", [author], 1)] + +# If true, show URL addresses after external links. +# man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ( + master_doc, + "Pydra", + "Pydra Documentation", + author, + "Pydra", + "Archive-centered analysis of neuroimaging data", + "Miscellaneous", + ), +] + +# Documents to append as an appendix to all manuals. +# texinfo_appendices = [] + +# If false, no module index is generated. +# texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +# texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +# texinfo_no_detailmenu = False + + +# -- Options for Epub output ---------------------------------------------- + +# Bibliographic Dublin Core info. +epub_title = project +epub_author = author +epub_publisher = author +epub_copyright = copyright + +# The basename for the epub file. It defaults to the project name. +# epub_basename = project + +# The HTML theme for the epub output. Since the default themes are not optimized +# for small screen space, using the same theme for HTML and epub output is +# usually not wise. This defaults to 'epub', a theme designed to save visual +# space. +# epub_theme = 'epub' + +# The language of the text. It defaults to the language option +# or 'en' if the language is not set. +# epub_language = '' + +# The scheme of the identifier. Typical schemes are ISBN or URL. +# epub_scheme = '' + +# The unique identifier of the text. This can be a ISBN number +# or the project homepage. +# epub_identifier = '' + +# A unique identification for the text. +# epub_uid = '' + +# A tuple containing the cover image and cover page html template filenames. +# epub_cover = () + +# A sequence of (type, uri, title) tuples for the guide element of content.opf. +# epub_guide = () + +# HTML files that should be inserted before the pages created by sphinx. +# The format is a list of tuples containing the path and title. +# epub_pre_files = [] + +# HTML files that should be inserted after the pages created by sphinx. +# The format is a list of tuples containing the path and title. +# epub_post_files = [] + +# A list of files that should not be packed into the epub file. +epub_exclude_files = ["search.html"] + +# The depth of the table of contents in toc.ncx. +# epub_tocdepth = 3 + +# Allow duplicate toc entries. +# epub_tocdup = True + +# Choose between 'default' and 'includehidden'. +# epub_tocscope = 'default' + +# Fix unsupported image types using the Pillow. +# epub_fix_images = False + +# Scale large images. +# epub_max_image_width = 0 + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +# epub_show_urls = 'inline' + +# If false, no index is generated. +# epub_use_index = True + + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {"python": ("https://docs.python.org/", None)} + +numpydoc_show_class_members = False diff --git a/new-docs/source/examples/glm.ipynb b/new-docs/source/examples/glm.ipynb new file mode 100644 index 0000000000..73813010de --- /dev/null +++ b/new-docs/source/examples/glm.ipynb @@ -0,0 +1,715 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c8149a94", + "metadata": {}, + "source": [ + "# General Linear Model (GLM)" + ] + }, + { + "cell_type": "markdown", + "id": "b54b132a", + "metadata": {}, + "source": [ + "In this tutorial, which is adapted from the Nilearn docs, we will go through a simple workflow of the first level general linear modeling with a BIDS dataset from openneuro. This analysis is only performed on **one** subject.\n", + "\n", + "This tutorial is based on the [Nilearn GLM tutorial](https://nilearn.github.io/stable/auto_examples/04_glm_first_level/plot_bids_features.html#sphx-glr-auto-examples-04-glm-first-level-plot-bids-features-py)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f514ffe", + "metadata": {}, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "markdown", + "id": "8313a041", + "metadata": {}, + "source": [ + "## Preparation\n", + "\n", + "Import packages that will be used globally and set up output directory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72d1dfdd", + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "import sys \n", + "if not sys.warnoptions:\n", + " warnings.simplefilter(\"ignore\")\n", + " \n", + "import os\n", + "import typing as ty\n", + "from pathlib import Path\n", + "\n", + "from pydra.design import python, workflow\n", + "from pydra.engine.submitter import Submitter\n", + "from fileformats.generic import File, Directory\n", + "from fileformats.text import Csv\n", + "import pandas as pd\n", + "from scipy.stats import norm\n", + "\n", + "import nibabel as nib\n", + "# These functions were removed within nilearn, so this notebook needs to be rewritten\n", + "# to use the 'openneuro' module instead\n", + "# from nilearn.datasets import (\n", + "# fetch_openneuro_dataset_index,\n", + "# fetch_openneuro_dataset,\n", + "# select_from_index,\n", + "# )\n", + "from nilearn.interfaces.fsl import get_design_from_fslmat\n", + "from nilearn.glm.first_level import first_level_from_bids\n", + "from nilearn.reporting import get_clusters_table, make_glm_report\n", + "from nilearn.plotting import (\n", + " plot_glass_brain,\n", + " plot_img_comparison,\n", + " plot_contrast_matrix,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5716cb50", + "metadata": {}, + "outputs": [], + "source": [ + "# get current directory\n", + "pydra_tutorial_dir = os.path.dirname(os.getcwd())\n", + "\n", + "# set up output directory\n", + "workflow_dir = Path(pydra_tutorial_dir) / 'outputs'\n", + "workflow_out_dir = workflow_dir / '6_glm'\n", + "\n", + "# create the output directory if not exit\n", + "os.makedirs(workflow_out_dir, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1878928b", + "metadata": {}, + "outputs": [], + "source": [ + "workflow_out_dir" + ] + }, + { + "cell_type": "markdown", + "id": "6cafd6a1", + "metadata": {}, + "source": [ + "## Create tasks\n", + "\n", + "In this section, we converte major steps into tasks.\n", + "Each pydra task can have multiple python functions. We recommend to put those logically more related functions into the same task.\n", + "\n", + "It is very **important** to keep in mind what adjacent tasks of your current task will be.\n", + "1. Your previous task will decide your arguments in the current task\n", + "2. Your next task will be impacted by the returns in the current task" + ] + }, + { + "cell_type": "markdown", + "id": "823780ab", + "metadata": {}, + "source": [ + "### fetch openneuro BIDS dataset\n", + "\n", + "In this task, we do the following:\n", + "1. get openneuro dataset index\n", + "2. specify exclusion patterns and number of subjects\n", + "3. download the data we need\n", + "\n", + "\n", + "**Notes:** Here we still use `n_subjects` as an argument. Given that we will only analyze one subject, you can also remove this argument and specify `n_subjects =1` in `select_from_index`. If you do, do not forget to modify the argument in the workflow later." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2ab134c", + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"data_dir\"])\n", + "def GetOpenneuroDataset(exclusion_patterns: list, n_subjects: int) -> str:\n", + " _, urls = fetch_openneuro_dataset_index()\n", + " urls = select_from_index(\n", + " urls, exclusion_filters=exclusion_patterns, n_subjects=n_subjects\n", + " )\n", + " data_dir, _ = fetch_openneuro_dataset(urls=urls)\n", + " return data_dir" + ] + }, + { + "cell_type": "markdown", + "id": "1b4899de", + "metadata": {}, + "source": [ + "### obtain FirstLevelModel objects automatically and fit arguments\n", + "\n", + "To get the first level model(s) we have to specify\n", + "1. the dataset directory\n", + "2. the task_label\n", + "3. the space_label\n", + "4. the folder with the desired derivatives (fMRIPrep)\n", + "\n", + "In our case, we only have one subject so we will only have one first level model.\n", + "Then, for this model, we will obtain\n", + "1. the list of run images\n", + "2. events\n", + "3. confound regressors\n", + "\n", + "Those are inferred from the confounds.tsv files available in the BIDS dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c2710dc", + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"model\", \"imgs\", \"subject\"])\n", + "def GetInfoFromBids(\n", + " data_dir: Directory,\n", + " task_label: str,\n", + " space_label: str,\n", + " smoothing_fwhm: float,\n", + " derivatives_folder: Directory,\n", + ") -> ty.Tuple[ty.Any, list, str]:\n", + " (\n", + " models,\n", + " models_run_imgs,\n", + " models_events,\n", + " models_confounds,\n", + " ) = first_level_from_bids(\n", + " dataset_path=data_dir,\n", + " task_label=task_label,\n", + " space_label=space_label,\n", + " smoothing_fwhm=smoothing_fwhm,\n", + " derivatives_folder=derivatives_folder,\n", + " )\n", + " model, imgs, events, confounds = (\n", + " models[0],\n", + " models_run_imgs[0],\n", + " models_events[0],\n", + " models_confounds[0],\n", + " )\n", + " subject = 'sub-' + model.subject_label\n", + " return model, imgs, subject" + ] + }, + { + "cell_type": "markdown", + "id": "e5af99cb", + "metadata": {}, + "source": [ + "### Get design matrix\n", + "\n", + "This task does the following:\n", + "1. read the design matrix in `.mat`\n", + "2. rename the column\n", + "3. save the new design matrix as `.csv`\n", + "\n", + "**Think:** What if we don't save the new design matrix, but `return` it directly? In other words, we `return` a `pandas.DataFrame` instead of a `path`. What will happen? Worth a try :)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2bdfcfd9", + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"dm_path\"])\n", + "def GetDesignMatrix(data_dir: Directory, subject: str) -> Csv:\n", + " fsl_design_matrix_path = data_dir.joinpath(\n", + " 'derivatives',\n", + " 'task',\n", + " subject,\n", + " 'stopsignal.feat',\n", + " 'design.mat',\n", + " )\n", + " design_matrix = get_design_from_fslmat(\n", + " fsl_design_matrix_path, column_names=None\n", + " )\n", + "\n", + " design_columns = [\n", + " 'cond_%02d' % i for i in range(len(design_matrix.columns))\n", + " ]\n", + " design_columns[0] = 'Go'\n", + " design_columns[4] = 'StopSuccess'\n", + " design_matrix.columns = design_columns\n", + " dm_path = Path('designmatrix.csv')\n", + " design_matrix.to_csv(dm_path, index=None)\n", + " return dm_path" + ] + }, + { + "cell_type": "markdown", + "id": "e1cb37d0", + "metadata": {}, + "source": [ + "### Fit the first level model\n", + "\n", + "What we are doing here is:\n", + "1. use the design matrix to fit the first level model\n", + "2. compute the contrast\n", + "3. save the z_map and masker for further use\n", + "4. generate a glm report (HTML file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65cec504", + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"model\", \"z_map_path\", \"masker\", \"glm_report_file\"])\n", + "def ModelFit(model, imgs, dm_path, contrast: str) -> ty.Tuple[ty.Any, str, ty.Any, str]:\n", + " design_matrix = pd.read_csv(dm_path)\n", + " model.fit(imgs, design_matrices=[design_matrix])\n", + " z_map = model.compute_contrast(contrast)\n", + " z_map_path = Path('firstlevel_z_map.nii.gz')\n", + " z_map.to_filename(z_map_path)\n", + " masker_path = Path('firstlevel_masker.nii.gz')\n", + " masker = model.masker_\n", + " glm_report_file = Path('glm_report.html')\n", + " report = make_glm_report(model, contrast)\n", + " report.save_as_html(glm_report_file)\n", + " return model, z_map_path, masker, glm_report_file" + ] + }, + { + "cell_type": "markdown", + "id": "05576ba4", + "metadata": {}, + "source": [ + "### Get cluster table\n", + "\n", + "For publication purposes, we obtain a cluster table." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4a86a6f", + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"output_file\"])\n", + "def ClusterTable(z_map_path: File) -> Csv:\n", + " stat_img = nib.load(z_map_path)\n", + " output_file = Path('cluster_table.csv')\n", + " df = get_clusters_table(\n", + " stat_img, stat_threshold=norm.isf(0.001), cluster_threshold=10\n", + " )\n", + " df.to_csv(output_file, index=None)\n", + " return output_file" + ] + }, + { + "cell_type": "markdown", + "id": "c1e8effd", + "metadata": {}, + "source": [ + "### Make plots\n", + "\n", + "Here we want to make some plots to display our results and compare the result from FSL.\n", + "1. plot nilearn z-map\n", + "2. plot fsl z-map\n", + "3. plot nilearn and fsl comparison\n", + "4. plot design matrix contrast\n", + "\n", + "You can also separate this task into multiple sub-tasks. But it makes more sense to put them into one task as they use the same files and function `nilearn.plotting` repeatedly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0f78107", + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"output_file1\", \"output_file2\", \"output_file3\", \"output_file4\"])\n", + "def Plots(\n", + " data_dir: Directory,\n", + " dm_path: File,\n", + " z_map_path: File,\n", + " contrast: str,\n", + " subject: str,\n", + " masker\n", + ") -> ty.Tuple[str, str, str, str]:\n", + " # plot and save nilearn z-map\n", + " z_map = nib.load(z_map_path)\n", + " output_file1 = Path('nilearn_z_map.jpg')\n", + " plot_glass_brain(\n", + " z_map,\n", + " output_file=output_file1,\n", + " colorbar=True,\n", + " threshold=norm.isf(0.001),\n", + " title='Nilearn Z map of \"StopSuccess - Go\" (unc p<0.001)',\n", + " plot_abs=False,\n", + " display_mode='ortho',\n", + " )\n", + "\n", + " # plot and save fsl z-map\n", + " fsl_z_map = nib.load(\n", + " os.path.join(\n", + " data_dir,\n", + " 'derivatives',\n", + " 'task',\n", + " subject,\n", + " 'stopsignal.feat',\n", + " 'stats',\n", + " 'zstat12.nii.gz',\n", + " )\n", + " )\n", + " output_file2 = Path('fsl_z_map.jpg')\n", + " plot_glass_brain(\n", + " fsl_z_map,\n", + " output_file=output_file2,\n", + " colorbar=True,\n", + " threshold=norm.isf(0.001),\n", + " title='FSL Z map of \"StopSuccess - Go\" (unc p<0.001)',\n", + " plot_abs=False,\n", + " display_mode='ortho',\n", + " )\n", + "\n", + " # plot and save nilearn and fsl comparison\n", + " plot_img_comparison(\n", + " [z_map],\n", + " [fsl_z_map],\n", + " masker,\n", + " output_dir=workflow_out_dir,\n", + " ref_label='Nilearn',\n", + " src_label='FSL',\n", + " )\n", + " old = Path('0000.png')\n", + " new = Path('nilearn_fsl_comp.jpg')\n", + " os.rename(old, new)\n", + " output_file3 = new\n", + " print(output_file3)\n", + "\n", + " # plot and save design matrix contrast\n", + " design_matrix = pd.read_csv(dm_path)\n", + " output_file4 = Path('firstlevel_contrast.jpg')\n", + " plot_contrast_matrix(contrast, design_matrix, output_file=output_file4)\n", + " return output_file1, output_file2, output_file3, output_file4" + ] + }, + { + "cell_type": "markdown", + "id": "12a99b96", + "metadata": {}, + "source": [ + "## Make a workflow from tasks\n", + "\n", + "Now we have created all tasks we need for this first level analysis, and there are two choices for our next step.\n", + "1. create one workflow to connect all tasks together\n", + "2. create sub-workflows with some closely related tasks, and connect these workflows along with other tasks into a larger workflow.\n", + "\n", + "We recommend the second approach as it is always a good practice to group tasks, especially when there are a large number of tasks in the analysis.\n", + "\n", + "Our analysis can be divided into three parts: (1) get/read the data, (2) analyze the data, and (3) plot the result, where (1) and (3) only have one task each. So we can put all tasks in (2) into one workflow and name it as `firstlevel` or whatever you prefer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e79e9b1", + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define(outputs=[\"z_map\", \"masker\", \"subject\", \"dm_path\", \"cluster_table\", \"glm_report\"])\n", + "def FirstLevelWorkflow(\n", + " data_dir: Directory,\n", + " contrast: str,\n", + " output_dir: Path,\n", + " task_label: str = 'stopsignal',\n", + " space_label: str = 'MNI152NLin2009cAsym',\n", + " derivatives_folder: str = 'derivatives/fmriprep',\n", + " smoothing_fwhm: float = 5.0,\n", + ") -> ty.Tuple[str, str, str, File, str, str]:\n", + "\n", + " # add task - get_info_from_bids\n", + " get_info_from_bids = workflow.add(\n", + " GetInfoFromBids(\n", + " data_dir=data_dir,\n", + " task_label=task_label,\n", + " space_label=space_label,\n", + " derivatives_folder=derivatives_folder,\n", + " smoothing_fwhm=smoothing_fwhm,\n", + " )\n", + " )\n", + " # add task - get_designmatrix\n", + " get_designmatrix = workflow.add(\n", + " GetDesignMatrix(\n", + " data_dir=data_dir,\n", + " subject=get_info_from_bids.subject,\n", + " )\n", + " )\n", + " l1estimation = workflow.add(\n", + " ModelFit(\n", + " model=get_info_from_bids.model,\n", + " imgs=get_info_from_bids.imgs,\n", + " dm_path=get_designmatrix.dm_path,\n", + " contrast=contrast,\n", + " )\n", + " )\n", + " # add task - cluster_table\n", + " cluster_table = workflow.add(\n", + " ClusterTable(\n", + " z_map_path=l1estimation.z_map_path,\n", + " )\n", + " )\n", + " # specify output\n", + " return (\n", + " l1estimation.z_map_path,\n", + " l1estimation.masker,\n", + " get_info_from_bids.subject,\n", + " get_designmatrix.dm_path,\n", + " cluster_table.output_file,\n", + " l1estimation.glm_report_file,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "657690ea", + "metadata": {}, + "source": [ + "## The overaching workflow\n", + "\n", + "Connect other tasks and the above workflow into one\n", + "\n", + "Now we need to create the overaching glm workflow that connects the above workflow and other tasks (e.g., `get/read the data` and `plot the result`)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d055c5d0", + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define(outputs=[\"output1\", \"output2\", \"output3\", \"output4\"])\n", + "def FullWorkflow(\n", + " output_dir: Path,\n", + " n_subjects: int = 1,\n", + " contrast: str = 'StopSuccess - Go',\n", + " exclusion_patterns: list[str] | None = None,\n", + ") -> tuple[ty.Any, ty.Any, ty.Any, ty.Any]:\n", + " if exclusion_patterns is None:\n", + " exclusion_patterns = [\n", + " '*group*',\n", + " '*phenotype*',\n", + " '*mriqc*',\n", + " '*parameter_plots*',\n", + " '*physio_plots*',\n", + " '*space-fsaverage*',\n", + " '*space-T1w*',\n", + " '*dwi*',\n", + " '*beh*',\n", + " '*task-bart*',\n", + " '*task-rest*',\n", + " '*task-scap*',\n", + " '*task-task*',\n", + " ]\n", + "\n", + " get_openneuro_dataset = workflow.add(\n", + " GetOpenneuroDataset(\n", + " exclusion_patterns=exclusion_patterns,\n", + " n_subjects=n_subjects,\n", + " )\n", + " )\n", + "\n", + " wf_firstlevel = workflow.add(\n", + " FirstLevelWorkflow(\n", + " data_dir=get_openneuro_dataset.data_dir,\n", + " contrast=contrast,\n", + " output_dir=output_dir,\n", + " )\n", + " )\n", + "\n", + " plots = workflow.add(\n", + " Plots(\n", + " data_dir=get_openneuro_dataset.data_dir,\n", + " dm_path=wf_firstlevel.dm_path,\n", + " z_map_path=wf_firstlevel.z_map,\n", + " contrast=contrast,\n", + " subject=wf_firstlevel.subject,\n", + " masker=wf_firstlevel.masker,\n", + " )\n", + " )\n", + "\n", + " return (\n", + " plots.output_file1,\n", + " plots.output_file2,\n", + " plots.output_file3,\n", + " plots.output_file4,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "1b2e9a46", + "metadata": {}, + "source": [ + "## Run Workflow Run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a90088e", + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "wf = FullWorkflow(output_dir=workflow_out_dir, n_subjects=1, contrast='StopSuccess - Go')\n", + "\n", + "if __name__ == \"__main__\":\n", + " with Submitter(worker='cf', n_procs=4) as sub:\n", + " results = sub(wf)\n", + "\n", + " print(results)" + ] + }, + { + "cell_type": "markdown", + "id": "f540cdd4", + "metadata": {}, + "source": [ + "## Visualization" + ] + }, + { + "cell_type": "markdown", + "id": "e8def869", + "metadata": {}, + "source": [ + "If you arrive here without any errors, yay, you just made your first pydra workflow for a first-level GLM!" + ] + }, + { + "cell_type": "markdown", + "id": "9b0585e3", + "metadata": {}, + "source": [ + "## Examine folder structure\n", + "\n", + "Let's take a look at what you have got." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75c1cfc9", + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "! ls ../outputs/6_glm" + ] + }, + { + "cell_type": "markdown", + "id": "56aeee0c", + "metadata": {}, + "source": [ + "### Plot figures" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f657571", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "\n", + "\n", + "if not results.errored:\n", + " # First-level contrast\n", + " Image(filename='../outputs/6_glm/firstlevel_contrast.jpg')\n", + "\n", + " # Nilearn Z map\n", + " Image(filename='../outputs/6_glm/nilearn_z_map.jpg')\n", + "\n", + " # FSL Z map\n", + " Image(filename='../outputs/6_glm/fsl_z_map.jpg')\n", + "\n", + " # Nilearn and FSL comparison\n", + " Image(filename='../outputs/6_glm/nilearn_fsl_comp.jpg')" + ] + }, + { + "cell_type": "markdown", + "id": "081bf13a", + "metadata": {}, + "source": [ + "## Exercise" + ] + }, + { + "cell_type": "markdown", + "id": "a3d55272", + "metadata": {}, + "source": [ + "What if we need to run the first-level GLM on multiple subject? We will need the `splitter`.\n", + "\n", + "So, where should we add `.split`?" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/new-docs/source/examples/t1w-preprocess.ipynb b/new-docs/source/examples/t1w-preprocess.ipynb new file mode 100644 index 0000000000..3c1271d26a --- /dev/null +++ b/new-docs/source/examples/t1w-preprocess.ipynb @@ -0,0 +1,46 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# T1w MRI preprocessing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an real-world example of a workflow to pre-process T1-weighted MRI images for further analysis\n", + "\n", + "Work in progress..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/new-docs/source/explanation/conditional-lazy.rst b/new-docs/source/explanation/conditional-lazy.rst new file mode 100644 index 0000000000..85178a6653 --- /dev/null +++ b/new-docs/source/explanation/conditional-lazy.rst @@ -0,0 +1,37 @@ +Dynamic construction +==================== + +Pydra workflows are constructed dynamically by workflow "constructor" functions. These +functions can use any valid Python code, allowing rich and complex workflows to be +constructed based on the inputs to the workflow. For example, a workflow constructor +could include conditional branches, loops, or other control flow structures, to tailor +the workflow to the specific inputs provided. + + +Lazy fields +----------- + +Pydra workflows are constructed by the assignment of "lazy field" placeholders from +the outputs of upstream nodes to the inputs of downstream nodes. These placeholders, +which are instances of the :class:`pydra.engine.specs.LazyField` class, are replaced +by the actual values they represent when the workflow is run. + + +Caching of workflow construction +-------------------------------- + +Workflows are constructed just before they are executed to produce a Directed Acyclic Graph +(DAG) of nodes. Tasks are generated from these nodes as upstream inputs become available +and added to the execution stack. If the workflow has been split, either at the top-level, +in an upstream node or at the current node, then a separate task will be generated for +split. + + +Nested workflows and lazy conditionals +-------------------------------------- + +Since lazy fields are only evaluated at runtime, they can't be used in conditional +statements that construct the workflow. However, if there is a section of a workflow +that needs to be conditionally included or excluded based on upstream outputs, that +section can be implemented in a nested workflow and that upstream be connected to the +nested workflow. diff --git a/new-docs/source/explanation/design-approach.rst b/new-docs/source/explanation/design-approach.rst new file mode 100644 index 0000000000..07c94226f1 --- /dev/null +++ b/new-docs/source/explanation/design-approach.rst @@ -0,0 +1,75 @@ + +Design philosophy +================= + +Rationale +--------- + +Scientific workflows often require sophisticated analyses that encompass a large collection +of algorithms. +The algorithms, that were originally not necessarily designed to work together, +and were written by different authors. +Some may be written in Python, while others might require calling external programs. +It is a common practice to create semi-manual workflows that require the scientists +to handle the files and interact with partial results from algorithms and external tools. +This approach is conceptually simple and easy to implement, but the resulting workflow +is often time consuming, error-prone and difficult to share with others. +Consistency, reproducibility and scalability demand scientific workflows +to be organized into fully automated pipelines. +This was the motivation behind Pydra - a new dataflow engine written in Python. + +History +------- + +The Pydra package is a part of the second generation of the Nipype_ ecosystem +--- an open-source framework that provides a uniform interface to existing neuroimaging +software and facilitates interaction between different software components. +The Nipype project was born in the neuroimaging community, and has been helping scientists +build workflows for a decade, providing a uniform interface to such neuroimaging packages +as FSL_, ANTs_, AFNI_, FreeSurfer_ and SPM_. +This flexibility has made it an ideal basis for popular preprocessing tools, +such as fMRIPrep_ and C-PAC_. +The second generation of Nipype ecosystem is meant to provide additional flexibility +and is being developed with reproducibility, ease of use, and scalability in mind. +Pydra itself is a standalone project and is designed as a general-purpose dataflow engine +to support any scientific domain. + +Goals +----- + +The goal of Pydra is to provide a lightweight dataflow engine for computational graph construction, +manipulation, and distributed execution, as well as ensuring reproducibility of scientific pipelines. +In Pydra, a dataflow is represented as a directed acyclic graph, where each node represents a Python +function, execution of an external tool, or another reusable dataflow. +The combination of several key features makes Pydra a customizable and powerful dataflow engine: + +- Composable dataflows: Any node of a dataflow graph can be another dataflow, allowing for nested + dataflows of arbitrary depths and encouraging creating reusable dataflows. + +- Flexible semantics for creating nested loops over input sets: Any Task or dataflow can be run + over input parameter sets and the outputs can be recombined (similar concept to Map-Reduce_ model, + but Pydra extends this to graphs with nested dataflows). + +- A content-addressable global cache: Hash values are computed for each graph and each Task. + This supports reusing of previously computed and stored dataflows and Tasks. + +- Support for Python functions and external (shell) commands: Pydra can decorate and use existing + functions in Python libraries alongside external command line tools, allowing easy integration + of existing code and software. + +- Native container execution support: Any dataflow or Task can be executed in an associated container + (via Docker or Singularity) enabling greater consistency for reproducibility. + +- Auditing and provenance tracking: Pydra provides a simple JSON-LD-based message passing mechanism + to capture the dataflow execution activities as a provenance graph. These messages track inputs + and outputs of each task in a dataflow, and the resources consumed by the task. + +.. _Nipype: https://nipype.readthedocs.io/en/latest/ +.. _FSL: https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/FSL +.. _ANTs: http://stnava.github.io/ANTs/ +.. _AFNI: https://afni.nimh.nih.gov/ +.. _FreeSurfer: https://surfer.nmr.mgh.harvard.edu/ +.. _SPM: https://www.fil.ion.ucl.ac.uk/spm/ +.. _fMRIPrep: https://fmriprep.org/en/stable/ +.. _C-PAC: https://fcp-indi.github.io/docs/latest/index +.. _Map-Reduce: https://en.wikipedia.org/wiki/MapReduce diff --git a/new-docs/source/explanation/environments.rst b/new-docs/source/explanation/environments.rst new file mode 100644 index 0000000000..f5f1009c00 --- /dev/null +++ b/new-docs/source/explanation/environments.rst @@ -0,0 +1,33 @@ +Software environments +===================== + +Pydra supports running tasks within encapsulated software environments, such as Docker_ +and Singularity_ containers. This can be specified at runtime or during workflow +construction, and allows tasks to be run in environments that are isolated from the +host system, and that have specific software dependencies. + +The environment a task runs within is specified by the ``environment`` argument passed +to the execution call (e.g. ``my_task(plugin="cf", environment="docker")``) or in the +``workflow.add()`` call in workflow constructors. + +Specifying at execution +----------------------- + +Work in progress... + + +Specifying at workflow construction +----------------------------------- + +Work in progress... + + + +Implementing new environment types +---------------------------------- + +Work in progress... + + +.. _Docker: https://www.docker.com/ +.. _Singularity: https://sylabs.io/singularity/ diff --git a/new-docs/source/explanation/hashing-caching.rst b/new-docs/source/explanation/hashing-caching.rst new file mode 100644 index 0000000000..3edbd434f5 --- /dev/null +++ b/new-docs/source/explanation/hashing-caching.rst @@ -0,0 +1,61 @@ +Caches and hashes +================= + +In Pydra, each task is run within its own working directory. If a task completes +successfully, their outputs are stored within this working directory. Working directories +are created within a cache directory, which is specified when the task is executed, and +named according to the hash of the task's inputs. This means that if the same task is +executed with the same inputs, the same working directory will be used, and instead of the task +being rerun, the outputs from the previous run will be reused. + +In this manner, incomplete workflows can be resumed from where they left off, and completed +workflows can be rerun without having to rerun all of the tasks. This is particularly useful +when working with datasets that are to be analysed in several different ways with +common intermediate steps, or when debugging workflows that have failed part way through. + + +Hash calculations +----------------- + +Hashes are calculated for different types of objects in different ways. For example, the +hash of a string is simply the hash of the string itself, whereas the hash of a dictionary +is the hash of all the file names and contents within the directory. Implementations for +most common types are provided in the :mod:`pydra.utils.hash` module, but custom types +can be hashed by providing a custom ``bytes_repr`` function (see +:ref:`Registering custom bytes_repr functions`). + +A cache dictionary, is passed each ``bytes_repr`` call that maps an objects id (i.e. +as returned by the built-in ``id()`` function) to the hash, to avoid infinite recursions +in the case of circular references. + +The byte representation of each object is hashed using the BlakeB cryptographic algorithm, +and these hashes are then combined to create a hash of the entire inputs object. + + +File hash caching by mtime +-------------------------- + +To avoid having to recalculate the hash of large files between runs, file hashes themselves +are cached in a platform specific user directory. These hashes are stored within small +files named by yet another hash of the file-system path an mtime of the file. This means that +the contents of a file should only need to be hashed once unless it is modified. + +.. note:: + + Due to limitations in mtime resolution on different platforms (e.g. 1 second on Linux, + potentially 2 seconds on Windows), it is conceivable that a file could be modified, + hashed, and then modified again within resolution period, causing the hash to be + invalid. Therefore, cached hashes are only used once the mtime resolution period + has lapsed since it was last modified, and may be recalculated in some rare cases. + + +Registering custom bytes_repr functions +--------------------------------------- + +Work in progress... + + +Cache misses due to unstable hashes +----------------------------------- + +Work in progress... diff --git a/new-docs/source/explanation/splitting-combining.rst b/new-docs/source/explanation/splitting-combining.rst new file mode 100644 index 0000000000..906a51443c --- /dev/null +++ b/new-docs/source/explanation/splitting-combining.rst @@ -0,0 +1,89 @@ +Splitting and combining +======================= + +One of the main goals of creating Pydra was to support flexible evaluation of a Task or a Workflow +over combinations of input parameters. +This is the key feature that distinguishes it from most other dataflow engines. +This is similar to the concept of the Map-Reduce_, but extends it to work over arbitrary nested graphs. +In complex dataflows, this would typically involve significant overhead for data management +and use of multiple nested loops. +In Pydra, this is controlled by setting specific State related attributes through Task methods. +In order to set input splitting (or mapping), Pydra requires setting up a splitter. +This is done using Task's split method. +The simplest example would be a Task that has one field x in the input, and therefore there +is only one way of splitting its input. +Assuming that the user provides a list as a value of x, Pydra splits the list, so each copy +of the Task will get one element of the list. +This can be represented as follow: + +.. math:: + + S = x: x=[x_1, x_2, ..., x_n] \longmapsto x=x_1, x=x_2, ..., x=x_n~, + +where S represents the splitter, and x is the input field. +This is also represented in the diagram, where :math:`x=[1, 2, 3]` as an example, and the coloured +nodes represent stateless copies of the original Task after splitting the input, +(these are the runnables that are executed). + +.. figure:: ../_static/images/nd_spl_1.png + :figclass: h! + :scale: 50 % + +Types of Splitter +----------------- +Whenever a *Task* has more complicated inputs, +i.e. multiple fields, there are two ways of creating the mapping, +each one is used for different application. +These *splitters* are called *scalar splitter* and *outer splitter*. +They use a special, but Python-based syntax as described next. + +Scalar Splitter +--------------- +A *scalar splitter* performs element-wise mapping and requires that the lists of +values for two or more fields to have the same length. The *scalar splitter* uses +Python tuples and its operation is therefore represented by a parenthesis, ``()``: + +.. math:: + + S = (x, y) : x=[x_1, x_2, .., x_n],~y=[y_1, y_2, .., y_n] \mapsto (x, y)=(x_1, y_1),..., (x, y)=(x_n, y_n), + + +where `S` represents the *splitter*, `x` and `y` are the input fields. +This is also represented as a diagram: + +.. figure:: ../_static/images/nd_spl_4.png + :figclass: h! + :scale: 80% + + +Outer Splitter +-------------- + +The second option of mapping the input, when there are multiple fields, is +provided by the *outer splitter*. The *outer splitter* creates all combination +of the input values and does not require the lists to have the same lengths. +The *outer splitter* uses Python's list syntax and is represented by square +brackets, ``[]``: + +.. math:: + + S = [x, y] &:& x=[x_1, x_2, ..., x_n],~~ y=[y_1, y_2, ..., y_m], \\ + &\mapsto& (x, y)=(x_1, y_1), (x, y)=(x_1, y_2)..., (x, y)=(x_n, y_m). + + +The *outer splitter* for a node with two input fields is schematically represented in the diagram: + +.. figure:: ../_static/images/nd_spl_3.png + :figclass: h! + :scale: 80% + + +Different types of splitters can be combined over inputs such as +`[inp1, (inp2, inp3)]`. In this example an *outer splitter* provides all +combinations of values of `inp1` with pairwise combinations of values of `inp2` +and `inp3`. This can be extended to arbitrary complexity. +In additional, the output can be merge at the end if needed. +This will be explained in the next section. + + +.. _Map-Reduce: https://en.wikipedia.org/wiki/MapReduce diff --git a/new-docs/source/explanation/typing.rst b/new-docs/source/explanation/typing.rst new file mode 100644 index 0000000000..82d47c7d35 --- /dev/null +++ b/new-docs/source/explanation/typing.rst @@ -0,0 +1,78 @@ +Typing and file-formats +======================= + +Pydra implements strong(-ish) type-checking at workflow construction time so some errors +can be caught before workflows are run on potentially expensive computing resources. +Input and output fields of tasks can be typed using Python annotations. +Unlike how they are typically used, in Pydra these type annotations are not just for +documentation and linting purposes, but are used to enforce the types of the inputs +and outputs of tasks and workflows at workflow construction and runtime. + +.. note:: + + With the exception of fields containing file-system paths, which should be typed + a FileFormats_ class, types don't need to be specified if not desired. + +File formats +------------ + +The FileFormats_ package provides a way to specify the format of a file, or set of +files, by the extensible collection of file format classes. These classes can be +used to specify the format of a file in a task input or output, and can be used +to validate the format of a file at runtime. + +It is important to use a FileFormats_ type instead of a ``str`` or ``pathlib.Path``, +when defining a field that take paths to file-system objects, because otherwise only +the file path, not the file contents, will be used in the hash used to locate the cache +(see :ref:`Caches and hashes`). However, in most cases, it is sufficient to use the +generic ``fileformats.generic.File``, ``fileformats.generic.Directory``, or the even +more generic ``fileformats.generic.FsObject`` or ``fileformats.generic.FileSet`` classes. + +The only cases where it isn't sufficient to use generic classes, is when there are +implicit header or side cars assumed to be present adjacent to the primary file (e.g. +a NIfTI file `my_nifti.nii` with an associated JSON sidecar file `my_nifti.json`). +Because the header/sidecar file(s) will not be included in the hash calculation +by default and may be omitted if the "file set" is copied into a different work +directories. In such cases, a specific file format class, such as +``fileformats.nifti.NiftiGzX``, should be used instead. + +Coercion +-------- + +Pydra will attempt to coerce the input to the correct type if it is not already, for example +if a tuple is provided to a field that is typed as a list, Pydra will convert the tuple to a list +before the task is run. By default the following coercions will be automatically +applied between the following types: + +* ty.Sequence → ty.Sequence +* ty.Mapping → ty.Mapping +* Path → os.PathLike +* str → os.PathLike +* os.PathLike → Path +* os.PathLike → str +* ty.Any → MultiInputObj +* int → float +* field.Integer → float +* int → field.Decimal + +In addition to this, ``fileformats.fields.Singular`` (see FileFormats_) +can be coerced to and from their primitive types and Numpy ndarrays and primitive types +can be coerced to and from Python sequences and built-in types, respectively. + +Superclass auto-casting +----------------------- + +Pydra is designed so that strict and specific typing can be used, but is not +unnecessarily strict, if it proves too burdensome. Therefore, upstream fields that are +typed as super classes (or as ``typing.Any`` by default) of the task input they are +connected to will be automatically cast to the subclass when the task is run. +This allows workflows and tasks to be easily connected together +regardless of how specific typing is defined in the task definition. This includes +file format types, so a task that expects a ``fileformats.medimage.NiftiGz`` file can +be connected to a task that outputs a ``fileformats.generic.File`` file. +Therefore, the only cases where a typing error will be raised are when the upstream +field can't be cast or coered to the downstream field, e.g. a ``fileformats.medimage.DicomSeries`` +cannot be cast to a ``fileformats.medimage.Nifti`` file. + + +.. _FileFormats: https://arcanaframework.github.io/fileformats diff --git a/new-docs/source/howto/create-task-package.ipynb b/new-docs/source/howto/create-task-package.ipynb new file mode 100644 index 0000000000..6b454fbae2 --- /dev/null +++ b/new-docs/source/howto/create-task-package.ipynb @@ -0,0 +1,25 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create a task package\n", + "\n", + "Work in progress..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/new-docs/source/howto/port-from-nipype.ipynb b/new-docs/source/howto/port-from-nipype.ipynb new file mode 100644 index 0000000000..ba228e387c --- /dev/null +++ b/new-docs/source/howto/port-from-nipype.ipynb @@ -0,0 +1,25 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Port interfaces from Nipype\n", + "\n", + "Work in progress..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst new file mode 100644 index 0000000000..65ab02e132 --- /dev/null +++ b/new-docs/source/index.rst @@ -0,0 +1,181 @@ +.. _home: + +Pydra +===== + +Pydra is a lightweight dataflow engine written in Python. Although designed to succeed +Nipype_ in order to address the needs of the neuroimaging community, Pydra can be used +for analytics in any scientific domain. Pydra facilitates the design of reproducible, +scalable and robust workflows that can link diverse processing tasks implemented as +shell commands or Python functions. + +**Key features:** + +* Combine diverse tasks (`Python functions <./tutorial/3-python.html>`__ or `shell commands <./tutorial/4-shell.html>`__) into coherent, robust `workflows <./tutorial/5-workflow.html>`__ +* Dynamic workflow construction using Python code (see :ref:`Dynamic construction`) +* Concurrent execution on `choice of computing platform (e.g. workstation, SLURM, SGE, Dask, etc...) <./tutorial/3-advanced-execution.html#Workers>`__ +* Map-reduce-like semantics (see :ref:`Splitting and combining`) +* Global caching to reduce recomputation (see :ref:`Caches and hashes`) +* Tasks can be executed in separate software environments, e.g. containers (see :ref:`Software environments`) +* Strong type-checking, including file types, before execution (see :ref:`Typing and file-formats`) + +See :ref:`Design philosophy` for more details on the rationale behind Pydra's design. + + +Installation +------------ + +Pydra is implemented purely in Python and has a small number of dependencies +It is easy to install via pip for Python >= 3.11 (preferably within a +`virtual environment`_): + +.. code-block:: bash + + $ pip install pydra + +Pre-designed tasks are available under the `pydra.tasks.*` namespace. These tasks +are typically implemented within separate packages that are specific to a given +shell-command toolkit, such as FSL_ (*pydra-fsl*), AFNI_ (*pydra-afni*) or +ANTs_ (*pydra-ants*), or a collection of related tasks/workflows, such as Niworkflows +(*pydra-niworkflows*). Pip can be used to install these extension packages as well: + +.. code-block:: bash + + $ pip install pydra-fsl pydra-ants + +Of course, if you use Pydra to execute commands within non-Python toolkits, you will +need to either have those commands installed on the execution machine, or use containers +to run them (see :ref:`Software environments`). + + +Tutorials and notebooks +----------------------- + +The following tutorials provide a step-by-step guide to using Pydra. They can be +studied in any order, but it is recommended to start with :ref:`Getting started` and +step through the list from there. + +The tutorials are written in Jupyter notebooks, which can be downloaded and run locally +or run online using the |Binder| button within each tutorial. + +If you decide to download the notebooks and run locally, be sure to install the necessary +dependencies (ideally within a `virtual environment`_): + +.. code-block:: bash + + $ pip install -e /path/to/your/pydra[tutorial] + + +Execution +~~~~~~~~~ + +Learn how to execute existing tasks (including workflows) on different systems + +* :ref:`Getting started` +* :ref:`Advanced execution` +* :ref:`Troubleshooting` + +Design +~~~~~~ + +Learn how to design your own tasks, wrapped shell commands or Python functions, or +workflows, + +* :ref:`Python-tasks` +* :ref:`Shell-tasks` +* :ref:`Workflows` +* :ref:`Canonical task form` + +Examples +~~~~~~~~ + +The following comprehensive examples demonstrate how to use Pydra to build and execute +complex workflows + +* :ref:`T1w MRI preprocessing` +* :ref:`General Linear Model (GLM)` + +How-to Guides +------------- + +The following guides provide step-by-step instructions on how to + +* :ref:`Create a task package` +* :ref:`Port interfaces from Nipype` + +Reference +--------- + +See the full reference documentation for Pydra + +* :ref:`API` +* :ref:`genindex` +* :ref:`modindex` + + +.. toctree:: + :maxdepth: 2 + :caption: Tutorials: Execution + :hidden: + + tutorial/1-getting-started + tutorial/2-advanced-execution + tutorial/3-troubleshooting + +.. toctree:: + :maxdepth: 2 + :caption: Tutorials: Design + :hidden: + + tutorial/4-python + tutorial/5-shell + tutorial/6-workflow + tutorial/7-canonical-form + + +.. toctree:: + :maxdepth: 2 + :caption: Examples + :hidden: + + examples/t1w-preprocess + examples/glm + +.. toctree:: + :maxdepth: 2 + :caption: How-to + :hidden: + + howto/create-task-package + howto/port-from-nipype + +.. toctree:: + :maxdepth: 2 + :caption: Explanation + :hidden: + + explanation/design-approach + explanation/splitting-combining + explanation/conditional-lazy + explanation/environments + explanation/hashing-caching + explanation/typing + + +.. toctree:: + :maxdepth: 2 + :caption: Reference + :hidden: + + reference/api + genindex + modindex + +.. _FSL: https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/FSL +.. _ANTs: http://stnava.github.io/ANTs/ +.. _AFNI: https://afni.nimh.nih.gov/ +.. _niworkflows: https://niworkflows.readthedocs.io/en/latest/ +.. _Nipype: https://nipype.readthedocs.io/en/latest/ +.. _virtual environment: https://docs.python.org/3/library/venv.html +.. |Binder| image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/nipype/pydra/develop diff --git a/new-docs/source/reference/api.rst b/new-docs/source/reference/api.rst new file mode 100644 index 0000000000..8e374efb51 --- /dev/null +++ b/new-docs/source/reference/api.rst @@ -0,0 +1,43 @@ +API +=== + +Python tasks +------------ + +.. automodule:: pydra.design.python + :members: + :undoc-members: + :show-inheritance: + +Shell tasks +----------- + +.. automodule:: pydra.design.shell + :members: + :undoc-members: + :show-inheritance: + +Workflows +--------- + +.. automodule:: pydra.design.workflow + :members: + :undoc-members: + :show-inheritance: + +Specification classes +--------------------- + +.. automodule:: pydra.engine.specs + :members: + :undoc-members: + :show-inheritance: + + +Task classes +------------ + +.. automodule:: pydra.engine.task + :members: + :undoc-members: + :show-inheritance: diff --git a/new-docs/source/tutorial/1-getting-started.ipynb b/new-docs/source/tutorial/1-getting-started.ipynb new file mode 100644 index 0000000000..a0d5b1e42b --- /dev/null +++ b/new-docs/source/tutorial/1-getting-started.ipynb @@ -0,0 +1,340 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Getting started\n", + "\n", + "The basic runnable component of Pydra is a *task*. Tasks are conceptually similar to\n", + "functions, in that they take inputs, operate on them and then return results. However,\n", + "unlike functions, tasks are parameterised before they are executed in a separate step.\n", + "This enables parameterised tasks to be linked together into workflows that are checked for\n", + "errors before they are executed, and modular execution workers and environments to specified\n", + "independently of the task being performed.\n", + "\n", + "Tasks can encapsulate Python functions or shell-commands, or be multi-component workflows,\n", + "themselves constructed from task components including nested workflows.\n", + "\n", + "## Preparation\n", + "\n", + "Before we get started, lets set up some test data to play with. Here we create a sample\n", + "JSON file in a temporary directory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from tempfile import mkdtemp\n", + "from pprint import pprint\n", + "import json\n", + "\n", + "JSON_CONTENTS = {'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.55, 6]}\n", + "\n", + "test_dir = Path(mkdtemp())\n", + "json_file = test_dir / \"test.json\"\n", + "with open(json_file, \"w\") as f:\n", + " json.dump(JSON_CONTENTS, f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next we create a directory containing 10 randomly generated [NIfTI](https://nifti.nimh.nih.gov/) files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fileformats.medimage import Nifti1\n", + "\n", + "nifti_dir = test_dir / \"nifti\"\n", + "nifti_dir.mkdir()\n", + "\n", + "for i in range(10):\n", + " Nifti1.sample(nifti_dir, seed=i) # Create a dummy NIfTI file in the dest. directory" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that when you run concurrent processes within a Jupyter notebook the following snippet\n", + "is also required" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Running your first task\n", + "\n", + "Pre-defined task definitions are installed under the `pydra.tasks.*` namespace by separate\n", + "task packages (e.g. `pydra-fsl`, `pydra-ants`, ...). To use a pre-defined task definition\n", + "\n", + "* import the class from the `pydra.tasks.*` package it is in\n", + "* instantiate it with appropriate parameters\n", + "* \"call\" resulting object (i.e. `my_task(...)`) to execute it as you would a function \n", + "\n", + "To demonstrate with an example of loading a JSON file with the\n", + "`pydra.tasks.common.LoadJson` task, we first create an example JSON file to test with" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can load the JSON contents back from the file using the `LoadJson` task definition\n", + "class" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import the task definition\n", + "from pydra.tasks.common import LoadJson\n", + "\n", + "# Instantiate the task definition, providing the JSON file we want to load\n", + "load_json = LoadJson(file=json_file)\n", + "\n", + "# Run the task to load the JSON file\n", + "outputs = load_json()\n", + "\n", + "# Access the loaded JSON output contents and check they match original\n", + "assert outputs.out == JSON_CONTENTS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Iterating over inputs\n", + "\n", + "It is straightforward to apply the same operation over a set of inputs using the `split()`\n", + "method. For example, if we wanted to re-grid all the NIfTI images stored in a directory,\n", + "such as the sample ones generated by the code below" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then we can by importing the `MrGrid` shell-command task from the `pydra-mrtrix3` package\n", + "and run it over every NIfTI file in the directory using the `TaskDef.split()` method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", + "\n", + "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", + "# by splitting the \"input\" input field over all files in the directory\n", + "mrgrid = MrGrid(operation=\"regrid\", voxel=(0.5,0.5,0.5)).split(in_file=nifti_dir.iterdir())\n", + "\n", + "# Run the task to resample all NIfTI files\n", + "outputs = mrgrid()\n", + "\n", + "# Print the locations of the output files\n", + "pprint(outputs.out_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is also possible to iterate over inputs in pairs/n-tuples. For example, if you wanted to use\n", + "different voxel sizes for different images, both the list of images and the voxel sizes\n", + "are passed to the `split()` method and their combination is specified by a tuple \"splitter\"\n", + "\n", + "\n", + "Note that it is important to use a tuple not a list for the splitter definition in this\n", + "case, because a list splitter is interpreted as the split over each combination of inputs\n", + "(see [Splitting and combining](../explanation/splitting-combining.html) for more details\n", + "on splitters)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mrgrid_varying_vox_sizes = MrGrid(operation=\"regrid\").split(\n", + " (\"in_file\", \"voxel\"),\n", + " in_file=nifti_dir.iterdir(),\n", + " # Define a list of voxel sizes to resample the NIfTI files to,\n", + " # the list must be the same length as the list of NIfTI files\n", + " voxel=[\n", + " (1.0, 1.0, 1.0),\n", + " (1.0, 1.0, 1.0),\n", + " (1.0, 1.0, 1.0),\n", + " (0.5, 0.5, 0.5),\n", + " (0.75, 0.75, 0.75),\n", + " (0.5, 0.5, 0.5),\n", + " (0.5, 0.5, 0.5),\n", + " (1.0, 1.0, 1.0),\n", + " (1.25, 1.25, 1.25),\n", + " (1.25, 1.25, 1.25),\n", + " ],\n", + ")\n", + "\n", + "outputs = mrgrid_varying_vox_sizes()\n", + "\n", + "pprint(outputs.out_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Executing tasks in parallel\n", + "\n", + "By default, Pydra will use the *debug* worker, which executes each task sequentially.\n", + "This makes it easier to debug tasks and workflows, however, in most cases, once a workflow\n", + "is tested, a concurrent worker is preferable so tasks can be executed in parallel\n", + "(see [Workers](./3-advanced-execution.html#Workers)). To use multiple processes on a\n", + "workstation, select the `cf` worker option when executing the task/workflow. Additional\n", + "keyword arguments, will be passed to the worker initialisation (e.g. `n_procs=4`).\n", + "\n", + "Note that when multiprocessing in Python on Windows and macOS (and good practice on Linux/POSIX\n", + "OSs for compatibility), you need to place a `if __name__ == \"__main__\"` block when\n", + "executing in top-level scripts to allow the script to be imported, but not executed,\n", + "by subprocesses." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", + "\n", + "if __name__ == \"__main__\": # <-- Add this block to allow the script to imported by subprocesses\n", + " mrgrid = MrGrid(operation=\"regrid\", voxel=(0.5,0.5,0.5)).split(in_file=nifti_dir.iterdir())\n", + " outputs = mrgrid(worker=\"cf\", n_procs=4) # <-- Select the \"cf\" worker here\n", + " print(\"\\n\".join(str(p) for p in outputs.out_file))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## File-system locations\n", + "\n", + "Output and intermediate files are typically generated during the course of a workflow/task run.\n", + "In addition to this, Pydra generates a cache directory for each task, in which\n", + "the task definition, results and any errors are stored in [cloudpickle](https://github.com/cloudpipe/cloudpickle)\n", + "files for future reference (see [Troubleshooting](./troubleshooting.html)).\n", + "By default, these cache directories are stored in a platform-specific application-cache\n", + "directory\n", + "\n", + "* Windows: `C:\\Users\\\\AppData\\Local\\pydra\\\\run-cache`\n", + "* Linux: `/home//.cache/pydra//run-cache`\n", + "* macOS: `/Users//Library/Caches/pydra//run-cache`\n", + "\n", + "When a task runs, a unique hash is generated by the combination of all the inputs to the\n", + "task and the operation to be performed. This hash is used to name the task cache directory\n", + "within the specified cache root. Therefore, if you use the same cache\n", + "root and in a subsequent run the same task is executed with the same\n", + "inputs, then the path of its cache directory will be the same, and if Pydra finds\n", + "existing results at that path, then the outputs generated by the previous run will be\n", + "reused.\n", + "\n", + "This cache will grow as more runs are called, therefore care needs to be taken to ensure\n", + "there is enough space on the target disk. Since the cache will be constantly To specify\n", + "a different location for this cache, simply provide the `cache_root` keyword argument to the execution call" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "outputs = mrgrid(cache_dir=Path(\"~/pydra-cache\").expanduser())\n", + "\n", + "pprint(outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To check alternative cache roots, while storing any generated task cache dirs in the \n", + "specified cache root, the `cache_locations` keyword argument can be used" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.utils import default_run_cache_dir\n", + "\n", + "my_cache_dir = Path(\"~/new-pydra-cache\").expanduser()\n", + "my_cache_dir.mkdir(exist_ok=True)\n", + "\n", + "outputs = mrgrid(\n", + " cache_dir=my_cache_dir,\n", + " cache_locations=[default_run_cache_dir]\n", + ")\n", + "\n", + "print(outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb new file mode 100644 index 0000000000..7765e71069 --- /dev/null +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -0,0 +1,428 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Advanced execution\n", + "\n", + "One of the key design features of Pydra is the separation between the parameterisation of\n", + "the task to be executed, and the parameresiation of where and how the task should be\n", + "executed (e.g. on the cloud, on a HPC cluster, ...). This tutorial steps you through\n", + "some of the available options for executing a task.\n", + "\n", + "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nipype/pydra-tutorial/develop/notebooks/tutorial/advanced_execution.ipynb)\n", + "\n", + "Remember that before attempting to run multi-process code in Jupyter notebooks, the\n", + "following snippet must be called" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submitter\n", + "\n", + "If you want to access a richer `Result` object you can use a Submitter object to initiate\n", + "the task execution. For example, using the `TenToThePower` task from the testing package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.engine.submitter import Submitter\n", + "from pydra.tasks.testing import TenToThePower\n", + "\n", + "\n", + "ten_to_the_power = TenToThePower(p=3)\n", + "\n", + "with Submitter() as submitter:\n", + " result = submitter(ten_to_the_power)\n", + "\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `Result` object contains\n", + "\n", + "* `output`: the outputs of the task (if there is only one output it is called `out` by default)\n", + "* `runtime`: information about the peak memory and CPU usage\n", + "* `errored`: the error status of the task\n", + "* `task`: the task object that generated the results\n", + "* `output_dir`: the output directory the results are stored in" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Workers\n", + "\n", + "Pydra supports several workers with which to execute tasks\n", + "\n", + "- `debug` (default)\n", + "- `cf`\n", + "- `slurm`\n", + "- `sge`\n", + "- `psij`\n", + "- `dask` (experimental)\n", + "\n", + "By default, the *debug* worker is used, which runs tasks serially in a single process\n", + "without use of the `asyncio` module. This makes it easier to debug errors in workflows\n", + "and python tasks, however, when using in Pydra in production you will typically want to\n", + "parallelise the execution for efficiency.\n", + "\n", + "If running on a local workstation, then the `cf` (*ConcurrentFutures*) worker is a good\n", + "option because it is able to spread the tasks to be run over multiple processes and\n", + "maximise CPU usage.\n", + "\n", + "If you have access to a high-performance cluster (HPC) then\n", + "the [SLURM](https://slurm.schedmd.com/documentation.html) and\n", + "[SGE](https://www.metagenomics.wiki/tools/hpc-sge) and [PSI/J](https://exaworks.org/psij)\n", + "workers can be used to submit each workflow node as separate jobs to the HPC scheduler.\n", + "There is also an experimental [Dask](https://www.dask.org/) worker, which provides a\n", + "range of execution backends to choose from.\n", + "\n", + "To specify a worker, the abbreviation can be passed either as a string or using the\n", + "class itself. Additional parameters can be passed to the worker initialisation as keyword\n", + "arguments to the execution call. For example, if we wanted to run five tasks using the\n", + "ConcurentFutures worker but only use three CPUs, we can pass `n_procs=3` to the execution\n", + "call.\n", + "\n", + "Remember that when calling multi-process code in a top level script the call must be\n", + "enclosed within a `if __name__ == \"__main__\"` block to allow the worker processes to\n", + "import the module without re-executing it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import tempfile\n", + "\n", + "cache_root = tempfile.mkdtemp()\n", + "\n", + "if __name__ == \"__main__\":\n", + "\n", + " ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5])\n", + "\n", + " # Run the 5 tasks in parallel split across 3 processes\n", + " outputs = ten_to_the_power(worker=\"cf\", n_procs=3, cache_dir=cache_root)\n", + "\n", + " p1, p2, p3, p4, p5 = outputs.out\n", + "\n", + " print(f\"10^5 = {p5}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, the worker object can be initialised in the calling code and passed directly to the execution call" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.engine.workers import ConcurrentFuturesWorker\n", + "\n", + "ten_to_the_power = TenToThePower().split(p=[6, 7, 8, 9, 10])\n", + "\n", + "# Run the 5 tasks in parallel split across 3 processes\n", + "outputs = ten_to_the_power(worker=ConcurrentFuturesWorker(n_procs=3))\n", + "\n", + "p6, p7, p8, p9, p10 = outputs.out\n", + "\n", + "print(f\"10^10 = {p10}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reusing previously generated results\n", + "\n", + "Pydra caches all task results in the runtime cache (see [File-system locations](./1-getting-started.html##File-system-locations))\n", + "as long as exactly the hashes of the inputs provided to the task are the same. Here we\n", + "go through some of the practicalities of this caching and hashing (see\n", + "[Caches and hashes](../explanation/hashing-caching.html) for more details and issues\n", + "to consider)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we attempt to run the same task with the same parameterisation the cache directory\n", + "will point to the same location and the results will be reused" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from copy import copy\n", + "from pathlib import Path\n", + "import tempfile\n", + "from pprint import pprint\n", + "from fileformats.medimage import Nifti1\n", + "from pydra.engine.submitter import Submitter\n", + "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", + "\n", + "# Make a temporary directory\n", + "test_dir = Path(tempfile.mkdtemp())\n", + "nifti_dir = test_dir / \"nifti\"\n", + "nifti_dir.mkdir()\n", + "\n", + "# Generate some random NIfTI files to work with\n", + "nifti_files = [Nifti1.sample(nifti_dir, seed=i) for i in range(10)]\n", + "\n", + "VOX_SIZES = [\n", + " (0.5, 0.5, 0.5),\n", + " (0.25, 0.25, 0.25),\n", + " (0.1, 0.1, 0.1),\n", + " (0.35, 0.35, 0.35),\n", + " (0.1, 0.1, 0.1),\n", + " (0.5, 0.5, 0.5),\n", + " (0.25, 0.25, 0.25),\n", + " (0.2, 0.2, 0.2),\n", + " (0.35, 0.35, 0.35),\n", + " (0.1, 0.1, 0.1),\n", + " ]\n", + "\n", + "mrgrid_varying_vox = MrGrid(operation=\"regrid\").split(\n", + " (\"in_file\", \"voxel\"),\n", + " in_file=nifti_files,\n", + " voxel=VOX_SIZES,\n", + ")\n", + "\n", + "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", + "\n", + "\n", + "with submitter:\n", + " result1 = submitter(mrgrid_varying_vox)\n", + "\n", + "\n", + "mrgrid_varying_vox2 = MrGrid(operation=\"regrid\").split(\n", + " (\"in_file\", \"voxel\"),\n", + " in_file=nifti_files,\n", + " voxel=copy(VOX_SIZES),\n", + ")\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result2 = submitter(mrgrid_varying_vox2)\n", + "\n", + "# Check that the output directory is the same for both runs\n", + "assert result2.output_dir == result1.output_dir\n", + "\n", + "# Change the voxel sizes to resample the NIfTI files to for one of the files\n", + "mrgrid_varying_vox2.voxel[2] = [0.25]\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result3 = submitter(mrgrid_varying_vox2)\n", + "\n", + "# The output directory will be different as the inputs are now different\n", + "assert result3.output_dir != result1.output_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that for file objects, the contents of the files are used to calculate the hash\n", + "not their paths. Therefore, when inputting large files there might be some additional\n", + "overhead on the first run (the file hashes themselves are cached by path and mtime so\n", + "shouldn't need to be recalculated unless they are modified). However, this makes the\n", + "hashes invariant to file-system movement. For example, changing the name of one of the\n", + "files in the nifti directory won't invalidate the hash." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Rename a NIfTI file within the test directory\n", + "nifti_files[0] = Nifti1(\n", + " nifti_files[0].fspath.rename(nifti_files[0].fspath.with_name(\"first.nii\"))\n", + ")\n", + "\n", + "mrgrid_varying_vox3 = MrGrid(operation=\"regrid\").split(\n", + " (\"in_file\", \"voxel\"),\n", + " in_file=nifti_files,\n", + " voxel=VOX_SIZES,\n", + ")\n", + "\n", + "# Result from previous run is reused as contents of the files have not changed, despite\n", + "# the file names changing\n", + "with submitter:\n", + " result4 = submitter(mrgrid_varying_vox3)\n", + "\n", + "assert result4.output_dir == result1.output_dir\n", + "\n", + "# Replace the first NIfTI file with a new file\n", + "nifti_files[0] = Nifti1.sample(nifti_dir, seed=100)\n", + "\n", + "# Update the in_file input field to include the new file\n", + "mrgrid_varying_vox4 = MrGrid(operation=\"regrid\").split(\n", + " (\"in_file\", \"voxel\"),\n", + " in_file=nifti_files,\n", + " voxel=VOX_SIZES,\n", + ")\n", + "\n", + "# The results from the previous runs are ignored as the files have changed\n", + "with submitter:\n", + " result4 = submitter(mrgrid_varying_vox4)\n", + "\n", + "# The cache directory for the new run is different \n", + "assert result4.output_dir != result1.output_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Environments and hooks\n", + "\n", + "For shell tasks, it is possible to specify that the command runs within a specific\n", + "software environment, such as those provided by software containers (e.g. Docker or Singularity/Apptainer).\n", + "This is down by providing the environment to the submitter/execution call," + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import tempfile\n", + "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", + "from pydra.engine.environments import Docker\n", + "\n", + "test_dir = tempfile.mkdtemp()\n", + "\n", + "nifti_file = Nifti1.sample(test_dir, seed=0)\n", + "\n", + "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", + "# by splitting the \"input\" input field over all files in the directory\n", + "mrgrid = MrGrid(in_file=nifti_file, operation=\"regrid\", voxel=(0.5,0.5,0.5))\n", + "\n", + "# Run the task to resample all NIfTI files\n", + "outputs = mrgrid(environment=Docker(image=\"mrtrix3/mrtrix3\", tag=\"latest\"))\n", + "\n", + "# Print the locations of the output files\n", + "pprint(outputs.out_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Of course for this to work Docker needs to work and be configured for\n", + "[sudo-less execution](https://docs.docker.com/engine/install/linux-postinstall/).\n", + "See [Containers and Environments](../explanation/environments.rst) for more details on\n", + "how to utilise containers and add support for other software environments.\n", + "\n", + "It is also possible to specify functions to run at hooks that are immediately before and after\n", + "the task is executed by passing a `pydra.engine.spec.TaskHooks` object to the `hooks`\n", + "keyword arg. The callable should take the `pydra.engine.core.Task` object as its only\n", + "argument and return None. The available hooks to attach functions are:\n", + "\n", + "* pre_run: before the task cache directory is created\n", + "* pre_run_task: after the cache directory has been created and the inputs resolved but before the task is executed\n", + "* post_run_task: after the task has been run and the outputs collected\n", + "* post_run: after the cache directory has been finalised\n", + "\n", + "\n", + "QUESTION: What are these hooks intended for? Should the post_run_task hook be run before the outputs have been\n", + "collected?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.engine.core import Task\n", + "from pydra.engine.specs import TaskHooks, Result\n", + "import os\n", + "import platform\n", + "\n", + "def notify_task_completion(task: Task, result: Result):\n", + " # Print a message to the terminal\n", + " print(f\"Task completed! Results are stored in {str(task.output_dir)!r}\")\n", + "\n", + " # Platform-specific notifications\n", + " if platform.system() == \"Darwin\": # macOS\n", + " os.system('osascript -e \\'display notification \"Task has completed successfully!\" with title \"Task Notification\"\\'')\n", + " elif platform.system() == \"Linux\": # Linux\n", + " os.system('notify-send \"Task Notification\" \"Task has completed successfully!\"')\n", + " elif platform.system() == \"Windows\": # Windows\n", + " os.system('msg * \"Task has completed successfully!\"')\n", + "\n", + "# Run the task to resample all NIfTI files\n", + "outputs = mrgrid(hooks=TaskHooks(post_run=notify_task_completion), cache_dir=tempfile.mkdtemp())\n", + "\n", + "# Print the locations of the output files\n", + "pprint(outputs.out_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Provenance and auditing\n", + "\n", + "Work in progress..." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/new-docs/source/tutorial/3-troubleshooting.ipynb b/new-docs/source/tutorial/3-troubleshooting.ipynb new file mode 100644 index 0000000000..2930c33d8c --- /dev/null +++ b/new-docs/source/tutorial/3-troubleshooting.ipynb @@ -0,0 +1,259 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Troubleshooting\n", + "\n", + "This tutorial steps through tecnhiques to identify errors and pipeline failures, as well\n", + "as avoid common pitfalls setting up executing over multiple processes." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Things to check if Pydra gets stuck\n", + "\n", + "I There are a number of common gotchas, related to running multi-process code, that can\n", + "cause Pydra workflows to get stuck and not execute correctly. If using the concurrent\n", + "futures worker (e.g. `worker=\"cf\"`), check these issues first before filing a bug report\n", + "or reaching out for help.\n", + "\n", + "### Applying `nest_asyncio` when running within a notebook\n", + "\n", + "When using the concurrent futures worker within a Jupyter notebook you need to apply\n", + "`nest_asyncio` with the following lines" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# This is needed to run parallel workflows in Jupyter notebooks\n", + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Enclosing multi-process code within `if __name__ == \"__main__\"`\n", + "\n", + "When running multi-process Python code on macOS or Windows, as is the case when the \n", + "concurrent futures worker is selected (i.e. `worker=\"cf\"`), then scripts that execute\n", + "the forking code need to be enclosed within an `if __name__ == \"__main__\"` block, e.g." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.tasks.testing import UnsafeDivisionWorkflow\n", + "from pydra.engine.submitter import Submitter\n", + "\n", + "# This workflow will fail because we are trying to divide by 0\n", + "wf = UnsafeDivisionWorkflow(a=10, b=5, denominator=2)\n", + "\n", + "if __name__ == \"__main__\":\n", + " with Submitter(worker=\"cf\") as sub:\n", + " result = sub(wf)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This allows the secondary processes to import the script without executing it. Without\n", + "such a block Pydra will lock up and not process the workflow. On Linux this is not an\n", + "issue due to the way that processes are forked, but is good practice in any case for\n", + "code portability." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Removing stray lockfiles\n", + "\n", + "When a Pydra task is executed, a lockfile is generated to signify that the task is running.\n", + "Other processes will wait for this lock to be released before attempting to access the\n", + "tasks results. The lockfiles are automatically deleted after a task completes, either\n", + "successfully or with an error, within a *try/finally* block so should run most of the time.\n", + "However, if a task/workflow is terminated by an interactive\n", + "debugger, the finally block may not be executed, leaving stray lockfiles. This\n", + "can cause the Pydra to hang waiting for the lock to be released. If you suspect this to be\n", + "an issue, and there are no other jobs running, then simply remove all lock files from your\n", + "cache directory (e.g. `rm /*.lock`) and re-submit your job.\n", + "\n", + "If the `clean_stale_locks` flag is set (by default when using the *debug* worker), locks that\n", + "were created before the outer task was submitted are removed before the task is run.\n", + "However, since these locks could be created by separate submission processes, ``clean_stale_locks`\n", + "is not switched on by default when using production workers (e.g. `cf`, `slurm`, etc...)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Inspecting errors\n", + "\n", + "### Running in *debug* mode\n", + "\n", + "By default, Pydra will run with the *debug* worker, which executes each task serially\n", + "within a single process without use of `async/await` blocks, to allow raised exceptions\n", + "to propagate gracefully to the calling code. If you are having trouble with a pipeline,\n", + "ensure that `worker=debug` is passed to the submission/execution call (the default).\n", + "\n", + "### Reading error files\n", + "\n", + "When a task raises an error, it is captured and saved in pickle file named `_error.pklz`\n", + "within task's cache directory. For example, when calling the toy `UnsafeDivisionWorkflow`\n", + "with a `denominator=0`, the task will fail." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# This workflow will fail because we are trying to divide by 0\n", + "wf = UnsafeDivisionWorkflow(a=10, b=5).split(denominator=[3, 2 ,0])\n", + "\n", + "if __name__ == \"__main__\":\n", + " try:\n", + " with Submitter(worker=\"cf\") as sub:\n", + " result = sub(wf)\n", + " except Exception as e:\n", + " print(e)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The error pickle files can be loaded using the `cloudpickle` library, noting that it is\n", + "important to use the same Python version to load the files that was used to run the Pydra\n", + "workflow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.utils import default_run_cache_dir\n", + "import cloudpickle as cp\n", + "from pprint import pprint\n", + "from pydra.tasks.testing import Divide\n", + "\n", + "with open(default_run_cache_dir / Divide(x=15, y=0)._checksum / \"_error.pklz\", \"rb\") as f:\n", + " error = cp.load(f)\n", + "\n", + "pprint(error)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tracing upstream issues\n", + "\n", + "Failures are common in scientific analysis, even for well tested workflows, due to\n", + "the novel nature and of scientific experiments and known artefacts that can occur.\n", + "Therefore, it is always to sanity-check results produced by workflows. When a problem\n", + "occurs in a multi-stage workflow it can be difficult to identify at which stage the\n", + "issue occurred.\n", + "\n", + "Currently in Pydra you need to step backwards through the tasks of the workflow, load\n", + "the saved task object and inspect its inputs to find the preceding nodes. If any of the\n", + "inputs that have been generated by previous nodes are not ok, then you should check the\n", + "tasks that generated them in turn. For file-based inputs, you should be able to find\n", + "the path of the preceding task's cache directory from the provided file path. However,\n", + "for non-file inputs you may need to exhaustively iterate through all the task dirs\n", + "in your cache root to find the issue.\n", + "\n", + "For example, in the following example workflow, if a divide by 0 occurs within the division\n", + "node of the workflow, then an `float('inf')` will be returned, which will then propagate\n", + "through the workflow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.engine.submitter import Submitter\n", + "from pydra.tasks.testing import SafeDivisionWorkflow\n", + "\n", + "wf = SafeDivisionWorkflow(a=10, b=5).split(denominator=[3, 2 ,0])\n", + "\n", + "if __name__ == \"__main__\":\n", + " with Submitter(worker=\"cf\") as sub:\n", + " result = sub(wf)\n", + " \n", + "print(f\"Workflow completed successfully, results saved in: {result.output_dir}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To find the task directory where the issue first surfaced, iterate through every task\n", + "cache dir and check the results for `float(\"inf\")`s" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import cloudpickle as cp\n", + "from pydra.utils import user_cache_dir\n", + "\n", + "run_cache = user_cache_dir / \"run-cache\"\n", + "\n", + "for task_cache_dir in run_cache.iterdir():\n", + " with open(task_cache_dir / \"_result.pklz\", \"rb\") as f:\n", + " result = cp.load(f)\n", + " if result.outputs is not None:\n", + " for field_name in result.outputs:\n", + " if result.outputs[field_name] == float('nan'):\n", + " print(f\"Task {task_cache_dir.name!r} produced a NaN value for {field_name!r}\")\n", + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/new-docs/source/tutorial/4-python.ipynb b/new-docs/source/tutorial/4-python.ipynb new file mode 100644 index 0000000000..a09a3d630c --- /dev/null +++ b/new-docs/source/tutorial/4-python.ipynb @@ -0,0 +1,260 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python-tasks\n", + "\n", + "Python task definitions are Python functions that are parameterised in a separate step before\n", + "they are executed or added to a workflow.\n", + "\n", + "## Define decorator\n", + "\n", + "The simplest way to define a Python task is to decorate a function with `pydra.design.python.define`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import python\n", + "\n", + "# Note that we use CamelCase as the return of the is a class\n", + "@python.define\n", + "def MyFirstTaskDef(a, b):\n", + " \"\"\"Sample function for testing\"\"\"\n", + " return a + b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The resulting task-definition class can be then parameterized (instantiated), and\n", + "executed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Instantiate the task, setting all parameters\n", + "my_first_task = MyFirstTaskDef(a=1, b=2.0)\n", + "\n", + "# Execute the task\n", + "outputs = my_first_task()\n", + "\n", + "print(outputs.out)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By default, the name of the output field for a function with only one output is `out`. To\n", + "name this something else, or in the case where there are multiple output fields, the `outputs`\n", + "argument can be provided to `python.define`\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"c\", \"d\"])\n", + "def NamedOutputTaskDef(a, b):\n", + " \"\"\"Sample function for testing\"\"\"\n", + " return a + b, a - b\n", + "\n", + "named_output_task = NamedOutputTaskDef(a=2, b=1)\n", + "\n", + "outputs = named_output_task()\n", + "\n", + "print(outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The input and output field attributes automatically extracted from the function, explicit\n", + "attributes can be augmented" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(\n", + " inputs={\"a\": python.arg(allowed_values=[1, 2, 3]), \"b\": python.arg(default=10.0)},\n", + " outputs={\n", + " \"c\": python.out(type=float, help=\"the sum of the inputs\"),\n", + " \"d\": python.out(type=float, help=\"the difference of the inputs\"),\n", + " },\n", + ")\n", + "def AugmentedTaskDef(a, b):\n", + " \"\"\"Sample function for testing\"\"\"\n", + " return a + b, a - b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Type annotations\n", + "\n", + "If provided, type annotations are included in the task definition, and are checked at\n", + "the time of parameterisation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import python\n", + "\n", + "# Note that we use CamelCase as the function is translated to a class\n", + "\n", + "@python.define\n", + "def MyTypedTask(a: int, b: float) -> float:\n", + " \"\"\"Sample function for testing\"\"\"\n", + " return a + b\n", + "\n", + "try:\n", + " # 1.5 is not an integer so this should raise a TypeError\n", + " my_typed_task = MyTypedTask(a=1.5, b=2.0)\n", + "except TypeError as e:\n", + " print(f\"Type error caught: {e}\")\n", + "else:\n", + " assert False, \"Expected a TypeError\"\n", + "\n", + "# While 2 is an integer, it can be implicitly coerced to a float\n", + "my_typed_task = MyTypedTask(a=1, b=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Docstring parsing\n", + "\n", + "Instead of explicitly providing help strings and output names in `inputs` and `outputs`\n", + "arguments, if the function describes the its inputs and/or outputs in the doc string, \n", + "in either reST, Google or NumpyDoc style, then they will be extracted and included in the\n", + "input or output fields\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pprint import pprint\n", + "from pydra.engine.helpers import fields_dict\n", + "\n", + "@python.define\n", + "def DocStrDef(a: int, b: float) -> tuple[float, float]:\n", + " \"\"\"Sample function for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + "\n", + " Returns:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + " return a + b, a * b\n", + "\n", + "pprint(fields_dict(DocStrDef))\n", + "pprint(fields_dict(DocStrDef.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Wrapping external functions\n", + "\n", + "Like all decorators, `python.define` is just a function, so can also be used to convert\n", + "a function that is defined separately into a Python task definition." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "NumpyCorrelate = python.define(np.correlate)\n", + "\n", + "numpy_correlate = NumpyCorrelate(a=[1, 2, 3], v=[0, 1, 0.5])\n", + "\n", + "outputs = numpy_correlate()\n", + "\n", + "print(outputs.out)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Like with decorated functions, input and output fields can be explicitly augmented via\n", + "the `inputs` and `outputs` arguments" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "NumpyCorrelate = python.define(np.correlate, outputs=[\"correlation\"])\n", + "\n", + "numpy_correlate = NumpyCorrelate(a=[1, 2, 3], v=[0, 1, 0.5])\n", + "\n", + "outputs = numpy_correlate()\n", + "\n", + "print(outputs.correlation)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/new-docs/source/tutorial/5-shell.ipynb b/new-docs/source/tutorial/5-shell.ipynb new file mode 100644 index 0000000000..5ceb8ced72 --- /dev/null +++ b/new-docs/source/tutorial/5-shell.ipynb @@ -0,0 +1,442 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Shell-tasks" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Command-line templates\n", + "\n", + "Shell task specs can be defined using from string templates that resemble the command-line usage examples typically used in in-line help. Therefore, they can be quick and intuitive way to specify a shell task. For example, a simple spec for the copy command `cp` that omits optional flags," + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import shell\n", + "\n", + "Cp = shell.define(\"cp \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Input and output fields are both specified by placing the name of the field within enclosing `<` and `>`. Outputs are differentiated by the `out|` prefix.\n", + "\n", + "This shell task can then be run just as a Python task would be run, first parameterising it, then executing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from tempfile import mkdtemp\n", + "\n", + "# Make a test file to copy\n", + "test_dir = Path(mkdtemp())\n", + "test_file = test_dir / \"in.txt\"\n", + "with open(test_file, \"w\") as f:\n", + " f.write(\"Contents to be copied\")\n", + "\n", + "# Parameterise the task definition\n", + "cp = Cp(in_file=test_file, destination=test_dir / \"out.txt\")\n", + "\n", + "# Print the cmdline to be run to double check\n", + "print(f\"Command-line to be run: {cp.cmdline}\")\n", + "\n", + "# Run the shell-comand task\n", + "outputs = cp()\n", + "\n", + "print(\n", + " f\"Contents of copied file ('{outputs.destination}'): \"\n", + " f\"'{Path(outputs.destination).read_text()}'\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If paths to output files are not provided in the parameterisation, it will default to the name of the field" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cp = Cp(in_file=test_file)\n", + "print(cp.cmdline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Defifying types\n", + "\n", + "By default, shell-command fields are considered to be of `fileformats.generic.FsObject` type. However, more specific file formats or built-in Python types can be specified by appending the type to the field name after a `:`.\n", + "\n", + "File formats are specified by their MIME type or \"MIME-like\" strings (see the [FileFormats docs](https://arcanaframework.github.io/fileformats/mime.html) for details)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fileformats.image import Png\n", + "\n", + "TrimPng = shell.define(\"trim-png \")\n", + "\n", + "trim_png = TrimPng(in_image=Png.mock(), out_image=\"/path/to/output.png\")\n", + "\n", + "print(trim_png.cmdline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Flags and options\n", + "\n", + "Command line flags can also be added to the shell template, either the single or double hyphen form.\n", + "The field template name immediately following the flag will be associate with that flag.\n", + "If there is no space between the flag and the field template, then the field is assumed\n", + "to be a boolean, otherwise it is assumed to be of type string unless otherwise specified.\n", + "\n", + "If a field is optional, the field template should end with a `?`. Tuple fields are\n", + "specified by comma separated types. The ellipsis (`...`) can signify tuple types with\n", + "variable number of items. Arguments and options that can be repeated are specified by\n", + "appending a `+` (at least one must be provided) or `*` (defaults to empty list). Note that\n", + "for options, this signifies that the flag itself is printed multiple times. e.g.\n", + "`my-command --multi-opt 1 2 --multi-opt 1 5`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pprint import pprint\n", + "from pydra.engine.helpers import fields_dict\n", + "\n", + "Cp = shell.define(\n", + " \"cp \"\n", + " \"-R \"\n", + " \"--text-arg \"\n", + " \"--int-arg \"\n", + " \"--tuple-arg \"\n", + ")\n", + "\n", + "pprint(fields_dict(Cp))\n", + "pprint(fields_dict(Cp.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Defaults\n", + "\n", + "Defaults can be specified by appending them to the field template after `=`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Cp = shell.define(\n", + " \"cp \"\n", + " \"-R \"\n", + " \"--text-arg \"\n", + " \"--int-arg \"\n", + " \"--tuple-arg \"\n", + ")\n", + "\n", + "print(f\"'--int-arg' default: {fields_dict(Cp)['int_arg'].default}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Path templates for output files\n", + "\n", + "By default, when an output file argument is defined, a `path_template` attribute will\n", + "be assigned to the field based on its name and extension (if applicable). For example,\n", + "the `zipped` output field in the following Gzip command will be assigned a\n", + "`path_template` of `out_file.gz`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import shell\n", + "from fileformats.generic import File\n", + "\n", + "Gzip = shell.define(\"gzip \")\n", + "gzip = Gzip(in_files=File.mock(\"/a/file.txt\"))\n", + "print(gzip.cmdline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, if this needs to be specified it can be by using the `$` operator, e.g." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Gzip = shell.define(\"gzip \")\n", + "gzip = Gzip(in_files=File.mock(\"/a/file.txt\"))\n", + "print(gzip.cmdline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To give the field a path_template of `archive.gz` when it is written on the command line.\n", + "Note that this value can always be overridden when the task is initialised, e.g." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gzip = Gzip(in_files=File.mock(\"/a/file.txt\"), out_file=\"/path/to/archive.gz\")\n", + "print(gzip.cmdline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Additional field attributes\n", + "\n", + "Additional attributes of the fields in the template can be specified by providing `shell.arg` or `shell.outarg` fields to the `inputs` and `outputs` keyword arguments to the define" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Cp = shell.define(\n", + " (\n", + " \"cp \"\n", + " \"-R \"\n", + " \"--text-arg \"\n", + " \"--int-arg \"\n", + " \"--tuple-arg \"\n", + " ),\n", + " inputs={\"recursive\": shell.arg(\n", + " help=(\n", + " \"If source_file designates a directory, cp copies the directory and \"\n", + " \"the entire subtree connected at that point.\"\n", + " )\n", + " )},\n", + " outputs={\n", + " \"out_dir\": shell.outarg(position=-2),\n", + " \"out_file\": shell.outarg(position=-1),\n", + " },\n", + ")\n", + "\n", + "\n", + "pprint(fields_dict(Cp))\n", + "pprint(fields_dict(Cp.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Callable outptus\n", + "\n", + "In addition to outputs that are specified to the tool on the command line, outputs can be derived from the outputs of the tool by providing a Python function that can take the output directory and inputs as arguments and return the output value. Callables can be either specified in the `callable` attribute of the `shell.out` field, or in a dictionary mapping the output name to the callable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pydra.design import shell\n", + "from pathlib import Path\n", + "from fileformats.generic import File\n", + "\n", + "# Arguments to the callable function can be one of \n", + "def get_file_size(out_file: Path) -> int:\n", + " \"\"\"Calculate the file size\"\"\"\n", + " result = os.stat(out_file)\n", + " return result.st_size\n", + "\n", + "\n", + "CpWithSize = shell.define(\n", + " \"cp \",\n", + " outputs={\"out_file_size\": get_file_size},\n", + ")\n", + "\n", + "# Parameterise the task definition\n", + "cp_with_size = CpWithSize(in_file=File.sample())\n", + "\n", + "# Run the command\n", + "outputs = cp_with_size()\n", + "\n", + "\n", + "print(f\"Size of the output file is: {outputs.out_file_size}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The callable can take any combination of the following arguments, which will be passed\n", + "to it when it is called\n", + "\n", + "* field: the `Field` object to be provided a value, useful when writing generic callables\n", + "* output_dir: a `Path` object referencing the working directory the command was run within\n", + "* inputs: a dictionary containing all the resolved inputs to the task\n", + "* stdout: the standard output stream produced by the command\n", + "* stderr: the standard error stream produced by the command\n", + "* *name of an input*: the name of any of the input arguments to the task, including output args that are part of the command line (i.e. output files)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To make workflows that use the interface type-checkable, the canonical form of a shell\n", + "task dataclass should inherit from `shell.Def` parameterized by its nested Outputs class,\n", + "and the `Outputs` nested class should inherit from `shell.Outputs`. Arguments that are\n", + "provided None values are not included in the command line, so optional arguments should\n", + "be typed as one of these equivalent forms `ty.Union[T, None]`, `ty.Optional[T]` or `T | None`\n", + "and have a default of `None`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.engine.specs import ShellDef, ShellOutputs\n", + "from pydra.utils.typing import MultiInputObj\n", + "from fileformats.generic import FsObject, Directory\n", + "\n", + "@shell.define\n", + "class Cp(ShellDef[\"Cp.Outputs\"]):\n", + "\n", + " executable = \"cp\"\n", + "\n", + " in_fs_objects: MultiInputObj[FsObject]\n", + " recursive: bool = shell.arg(argstr=\"-R\", default=False)\n", + " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", + " int_arg: int | None = shell.arg(argstr=\"--int-arg\", default=None)\n", + " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\", default=None)\n", + "\n", + " @shell.outputs\n", + " class Outputs(ShellOutputs):\n", + " out_dir: Directory = shell.outarg(path_template=\"{out_dir}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dynamic definitions\n", + "\n", + "In some cases, it is required to generate the definition for a task dynamically, which can be done by just providing the executable to `shell.define` and specifying all inputs and outputs explicitly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fileformats.generic import File\n", + "from pydra.engine.helpers import list_fields\n", + "\n", + "ACommand = shell.define(\n", + " \"a-command\",\n", + " inputs={\n", + " \"in_file\": shell.arg(type=File, help=\"output file\", argstr=\"\", position=-2)\n", + " },\n", + " outputs={\n", + " \"out_file\": shell.outarg(\n", + " type=File, help=\"output file\", argstr=\"\", position=-1\n", + " ),\n", + " \"out_file_size\": {\n", + " \"type\": int,\n", + " \"help\": \"size of the output directory\",\n", + " \"callable\": get_file_size,\n", + " }\n", + " },\n", + ")\n", + "\n", + "\n", + "print(f\"ACommand input fields: {list_fields(ACommand)}\")\n", + "print(f\"ACommand input fields: {list_fields(ACommand.Outputs)}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/new-docs/source/tutorial/6-workflow.ipynb b/new-docs/source/tutorial/6-workflow.ipynb new file mode 100644 index 0000000000..ecb8cd9d87 --- /dev/null +++ b/new-docs/source/tutorial/6-workflow.ipynb @@ -0,0 +1,556 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Workflows\n", + "\n", + "In Pydra, workflows are DAG of component tasks to be executed on specified inputs.\n", + "Workflow definitions are dataclasses, which interchangeable with Python and shell tasks\n", + "definitions and executed in the same way." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constructor functions\n", + "\n", + "Workflows are typically defined using the `pydra.design.workflow.define` decorator on \n", + "a \"constructor\" function that generates the workflow. For example, given two task\n", + "definitions, `Add` and `Mul`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import workflow, python\n", + "\n", + "# Example python task definitions\n", + "@python.define\n", + "def Add(a, b):\n", + " return a + b\n", + "\n", + "\n", + "@python.define\n", + "def Mul(a, b):\n", + " return a * b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " we can create a simple workflow definition using `workflow.define` to decorate a function that constructs the workflow. Nodes are added to the workflow being constructed by calling `workflow.add` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define\n", + "def BasicWorkflow(a, b):\n", + " add = workflow.add(Add(a=a, b=b))\n", + " mul = workflow.add(Mul(a=add.out, b=b))\n", + " return mul.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`workflow.add` returns an \"outputs\" object corresponding to the definition added to the\n", + "workflow. The fields of the outptus object can be referenced as inputs to downstream\n", + "workflow nodes. Note that these output fields are just placeholders for the values that will\n", + "be returned and can't be used in conditional statements during workflow construction\n", + "(see [Dynamic construction](../explanation/conditional-lazy.html) on how to work around this\n", + "limitation). The fields of the outputs to be returned by the workflow should be returned\n", + "in a tuple." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import shell\n", + "from fileformats import image, video\n", + "\n", + "@workflow.define\n", + "def ShellWorkflow(\n", + " input_video: video.Mp4,\n", + " watermark: image.Png,\n", + " watermark_dims: tuple[int, int] = (10, 10),\n", + ") -> video.Mp4:\n", + "\n", + " add_watermark = workflow.add(\n", + " shell.define(\n", + " \"ffmpeg -i -i \"\n", + " \"-filter_complex \"\n", + " )(\n", + " in_video=input_video,\n", + " watermark=watermark,\n", + " filter=\"overlay={}:{}\".format(*watermark_dims),\n", + " )\n", + " )\n", + " output_video = workflow.add(\n", + " shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + " )(in_video=add_watermark.out_video, width=1280, height=720)\n", + " ).out_video\n", + "\n", + " return output_video # test implicit detection of output name" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Splitting/combining task inputs\n", + "\n", + "Sometimes, you might want to perform the same task over a set of input values/files, and then collect the results into a list to perform further processing. This can be achieved by using the `split` and `combine` methods" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define\n", + "def Sum(x: list[float]) -> float:\n", + " return sum(x)\n", + "\n", + "@workflow.define\n", + "def SplitWorkflow(a: list[int], b: list[float]) -> list[float]:\n", + " # Multiply over all combinations of the elements of a and b, then combine the results\n", + " # for each a element into a list over each b element\n", + " mul = workflow.add(Mul().split(x=a, y=b).combine(\"x\"))\n", + " # Sume the multiplications across all all b elements for each a element\n", + " sum = workflow.add(Sum(x=mul.out))\n", + " return sum.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The combination step doesn't have to be done on the same step as the split, in which case the splits propagate to downstream nodes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define\n", + "def SplitThenCombineWorkflow(a: list[int], b: list[float], c: float) -> list[float]:\n", + " mul = workflow.add(Mul().split(x=a, y=b))\n", + " add = workflow.add(Add(x=mul.out, y=c).combine(\"Mul.x\"))\n", + " sum = workflow.add(Sum(x=add.out))\n", + " return sum.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more advanced discussion on the intricacies of splitting and combining see [Splitting and combining](../explanation/splitting-combining.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Nested and conditional workflows\n", + "\n", + "One of the most powerful features of Pydra is the ability to use inline Python code to conditionally add/omit nodes to workflow, and alter the parameterisation of the nodes, depending on inputs to the workflow " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define\n", + "def ConditionalWorkflow(\n", + " input_video: video.Mp4,\n", + " watermark: image.Png,\n", + " watermark_dims: tuple[int, int] | None = None,\n", + ") -> video.Mp4:\n", + "\n", + " if watermark_dims is not None:\n", + " add_watermark = workflow.add(\n", + " shell.define(\n", + " \"ffmpeg -i -i \"\n", + " \"-filter_complex \"\n", + " )(\n", + " in_video=input_video,\n", + " watermark=watermark,\n", + " filter=\"overlay={}:{}\".format(*watermark_dims),\n", + " )\n", + " )\n", + " handbrake_input = add_watermark.out_video\n", + " else:\n", + " handbrake_input = input_video\n", + "\n", + " output_video = workflow.add(\n", + " shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + " )(in_video=handbrake_input, width=1280, height=720)\n", + " ).out_video\n", + "\n", + " return output_video # test implicit detection of output name" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that outputs of upstream nodes cannot be used in conditional statements, since these are just placeholders at the time the workflow is being constructed. However, you can get around\n", + "this limitation by placing the conditional logic within a nested workflow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define\n", + "def Subtract(x: float, y: float) -> float:\n", + " return x - y\n", + "\n", + "@workflow.define\n", + "def RecursiveNestedWorkflow(a: float, depth: int) -> float:\n", + " add = workflow.add(Add(x=a, y=1))\n", + " decrement_depth = workflow.add(Subtract(x=depth, y=1))\n", + " if depth > 0:\n", + " out_node = workflow.add(\n", + " RecursiveNestedWorkflow(a=add.out, depth=decrement_depth.out)\n", + " )\n", + " else:\n", + " out_node = add\n", + " return out_node.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more detailed discussion of the construction of conditional workflows and \"lazy field\"\n", + "placeholders see [Conditionals and lazy fields](../explanation/conditional-lazy.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Type-checking between nodes\n", + "\n", + "Pydra utilizes Python type annotations to implement strong type-checking, which is performed\n", + "when values or upstream outputs are assigned to task definition inputs.\n", + "\n", + "Task input and output fields do not need to be assigned types, since they will default to `typing.Any`.\n", + "However, if they are assigned a type and a value or output from an upstream node conflicts\n", + "with the type, a `TypeError` will be raised at construction time.\n", + "\n", + "Note that the type-checking \"assumes the best\", and will pass if the upstream field is typed\n", + "by `Any` or a super-class of the field being assigned to. For example, an input of\n", + "`fileformats.generic.File` passed to a field expecting a `fileformats.image.Png` file type,\n", + "because `Png` is a subtype of `File`, where as `fileformats.image.Jpeg` input would fail\n", + "since it is clearly not the intended type.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fileformats import generic\n", + "\n", + "Mp4Handbrake = shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + ")\n", + "\n", + "\n", + "QuicktimeHandbrake = shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + ")\n", + "\n", + "@workflow.define\n", + "def TypeErrorWorkflow(\n", + " input_video: video.Mp4,\n", + " watermark: generic.File,\n", + " watermark_dims: tuple[int, int] = (10, 10),\n", + ") -> video.Mp4:\n", + "\n", + " add_watermark = workflow.add(\n", + " shell.define(\n", + " \"ffmpeg -i -i \"\n", + " \"-filter_complex \"\n", + " )(\n", + " in_video=input_video, # This is OK because in_video is typed Any\n", + " watermark=watermark, # Type is OK because generic.File is superclass of image.Png\n", + " filter=\"overlay={}:{}\".format(*watermark_dims),\n", + " ),\n", + " name=\"add_watermark\",\n", + " )\n", + "\n", + " try:\n", + " handbrake = workflow.add(\n", + " QuicktimeHandbrake(in_video=add_watermark.out_video, width=1280, height=720),\n", + " ) # This will raise a TypeError because the input video is an Mp4\n", + " except TypeError:\n", + " handbrake = workflow.add(\n", + " Mp4Handbrake(in_video=add_watermark.out_video, width=1280, height=720),\n", + " ) # The type of the input video is now correct\n", + "\n", + " return handbrake.output_video" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more detailed discussion on Pydra's type-checking see [Type Checking](../explanation/typing.html)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Accessing the workflow object\n", + "\n", + "If you need to access the workflow object being constructed from inside the constructor function you can use `workflow.this()`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"divided\"])\n", + "def Divide(x, y):\n", + " return x / y\n", + "\n", + "\n", + "@workflow.define(outputs=[\"out1\", \"out2\"])\n", + "def DirectAccesWorkflow(a: int, b: float) -> tuple[float, float]:\n", + " \"\"\"A test workflow demonstration a few alternative ways to set and connect nodes\n", + "\n", + " Args:\n", + " a: An integer input\n", + " b: A float input\n", + "\n", + " Returns:\n", + " out1: The first output\n", + " out2: The second output\n", + " \"\"\"\n", + "\n", + " wf = workflow.this()\n", + "\n", + " add = wf.add(Add(x=a, y=b), name=\"addition\")\n", + " mul = wf.add(python.define(Mul, outputs={\"out\": float})(x=add.z, y=b))\n", + " divide = wf.add(Divide(x=wf[\"addition\"].lzout.z, y=mul.out), name=\"division\")\n", + "\n", + " # Alter one of the inputs to a node after it has been initialised\n", + " wf[\"Mul\"].inputs.y *= 2\n", + "\n", + " return mul.out, divide.divided" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Directly access the workflow being constructed also enables you to set the outputs of the workflow directly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define(outputs={\"out1\": float, \"out2\": float})\n", + "def SetOutputsOfWorkflow(a: int, b: float):\n", + " \"\"\"A test workflow demonstration a few alternative ways to set and connect nodes\n", + "\n", + " Args:\n", + " a: An integer input\n", + " b: A float input\n", + "\n", + " Returns:\n", + " out1: The first output\n", + " out2: The second output\n", + " \"\"\"\n", + "\n", + " wf = workflow.this()\n", + "\n", + " add = wf.add(Add(x=a, y=b), name=\"addition\")\n", + " mul = wf.add(python.define(Mul, outputs={\"out\": float})(x=add.z, y=b))\n", + " divide = wf.add(Divide(x=wf[\"addition\"].lzout.z, y=mul.out), name=\"division\")\n", + "\n", + " # Alter one of the inputs to a node after it has been initialised\n", + " wf[\"Mul\"].inputs.y *= 2\n", + "\n", + " # Set the outputs of the workflow directly\n", + " wf.outputs.out1 = mul.out\n", + " wf.outputs.out2 = divide.divided" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setting software environments per node\n", + "\n", + "The [Advanced execution tutorial](./2-advanced-execution.html) showed how the software\n", + "environment (e.g. Docker container) could be specified for shell tasks by passing the\n", + "`environment` variable to the task execution/submission call. For shell tasks\n", + "within workflows, the software environment used for them is specified when adding\n", + "a new workflow node, i.e." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import tempfile\n", + "from pathlib import Path\n", + "import numpy as np\n", + "from fileformats.medimage import Nifti1\n", + "import fileformats.medimage_mrtrix3 as mrtrix3\n", + "from pydra.engine.environments import Docker\n", + "from pydra.design import workflow, python\n", + "from pydra.tasks.mrtrix3.v3_0 import MrConvert, MrThreshold\n", + "\n", + "MRTRIX2NUMPY_DTYPES = {\n", + " \"Int8\": np.dtype(\"i1\"),\n", + " \"UInt8\": np.dtype(\"u1\"),\n", + " \"Int16LE\": np.dtype(\"i2\"),\n", + " \"UInt16LE\": np.dtype(\"u2\"),\n", + " \"Int32LE\": np.dtype(\"i4\"),\n", + " \"UInt32LE\": np.dtype(\"u4\"),\n", + " \"Float32LE\": np.dtype(\"f4\"),\n", + " \"Float64LE\": np.dtype(\"f8\"),\n", + " \"CFloat32LE\": np.dtype(\"c8\"),\n", + " \"CFloat64LE\": np.dtype(\"c16\"),\n", + "}\n", + "\n", + "\n", + "@workflow.define(outputs=[\"out_image\"])\n", + "def ToyMedianThreshold(in_image: Nifti1) -> mrtrix3.ImageFormat:\n", + " \"\"\"A toy example workflow that\n", + "\n", + " * converts a NIfTI image to MRTrix3 image format with a separate header\n", + " * loads the separate data file and selects the median value\n", + " \"\"\"\n", + "\n", + " input_conversion = workflow.add(\n", + " MrConvert(in_file=in_image, out_file=\"out_file.mih\"),\n", + " name=\"input_conversion\",\n", + " environment=Docker(\"mrtrix3/mrtrix3\", tag=\"latest\"),\n", + " )\n", + "\n", + " @python.define\n", + " def Median(mih: mrtrix3.ImageHeader) -> float:\n", + " \"\"\"A bespoke function that reads the separate data file in the MRTrix3 image\n", + " header format (i.e. .mih) and calculates the median value.\n", + "\n", + " NB: We could use a MrStats task here, but this is just an example to show how\n", + " to use a bespoke function in a workflow.\n", + " \"\"\"\n", + " dtype = MRTRIX2NUMPY_DTYPES[mih.metadata[\"datatype\"].strip()]\n", + " data = np.frombuffer(Path.read_bytes(mih.data_file), dtype=dtype)\n", + " return np.median(data)\n", + "\n", + " median = workflow.add(Median(mih=input_conversion.out_file))\n", + "\n", + " threshold = workflow.add(\n", + " MrThreshold(in_file=in_image, out_file=\"binary.mif\", abs=median.out),\n", + " environment=Docker(\"mrtrix3/mrtrix3\", tag=\"latest\"),\n", + " )\n", + "\n", + " output_conversion = workflow.add(\n", + " MrConvert(in_file=threshold.out_file, out_file=\"out_image.mif\"),\n", + " name=\"output_conversion\",\n", + " environment=Docker(\"mrtrix3/mrtrix3\", tag=\"latest\"),\n", + " )\n", + "\n", + " return output_conversion.out_file\n", + "\n", + "\n", + "test_dir = tempfile.mkdtemp()\n", + "\n", + "nifti_file = Nifti1.sample(test_dir, seed=0)\n", + "\n", + "wf = ToyMedianThreshold(in_image=nifti_file)\n", + "\n", + "outputs = wf()\n", + "\n", + "print(outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "See [Containers and Environments](../explanation/environments.rst) for more details on\n", + "how to utilise containers and add support for other software environments." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/new-docs/source/tutorial/7-canonical-form.ipynb b/new-docs/source/tutorial/7-canonical-form.ipynb new file mode 100644 index 0000000000..d270949a1d --- /dev/null +++ b/new-docs/source/tutorial/7-canonical-form.ipynb @@ -0,0 +1,262 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Canonical task form\n", + "\n", + "Under the hood, all Python, shell and workflow task definitions generated by the\n", + "`pydra.design.*.define` decorators/functions are translated to\n", + "[dataclasses](https://docs.python.org/3/library/dataclasses.html) by the\n", + "[Attrs](https://www.attrs.org/en/stable/). While the more compact syntax described\n", + "in the [Python-tasks](./4-python.html), [Shell-tasks](./5-shell.html) and [Workflow](./6-workflow.html)\n", + "tutorials is convenient when designing tasks for specific use cases, it is too magical\n", + "for linters follow. Therefore, when designing task definitions to be used by third\n", + "parties (e.g. `pydra-fsl`, `pydra-ants`) it is recommended to favour the, more\n", + "explicit, \"canonical\" dataclass form.\n", + "\n", + "The syntax of the canonical form is close to that used by the\n", + "[Attrs](https://www.attrs.org/en/stable/) package itself, with class type annotations\n", + "used to define the fields of the inputs and outputs of the task. Tasks defined in canonical\n", + "form will be able to be statically type-checked by [MyPy](https://mypy-lang.org/)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Python-task definitions\n", + "\n", + "Python tasks in dataclass form are decorated by `pydra.design.python.define`\n", + "with inputs listed as type annotations. Outputs are similarly defined in a nested class\n", + "called `Outputs`. The function to be executed should be a staticmethod called `function`.\n", + "Default values can also be set directly, as with Attrs classes.\n", + "\n", + "In order to allow static type-checkers to check the type of outputs of tasks added\n", + "to workflows, it is also necessary to explicitly extend from the `pydra.engine.specs.PythonDef`\n", + "and `pydra.engine.specs.PythonOutputs` classes (they are otherwise set as bases by the\n", + "`define` method implicitly). Thus the \"canonical form\" of Python task definition is as\n", + "follows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pprint import pprint\n", + "from pydra.engine.helpers import fields_dict\n", + "from pydra.engine.specs import PythonDef, PythonOutputs\n", + "from pydra.design import python\n", + "\n", + "\n", + "@python.define\n", + "class CanonicalPythonDef(PythonDef[\"CanonicalPythonDef.Outputs\"]):\n", + " \"\"\"Canonical Python task definition class for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + " \"\"\"\n", + "\n", + " a: int\n", + " b: float = 2.0 # set default value\n", + "\n", + " class Outputs(PythonOutputs):\n", + " \"\"\"\n", + " Args:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + "\n", + " c: float\n", + " d: float\n", + "\n", + " @staticmethod\n", + " def function(a, b):\n", + " return a + b, a / b\n", + "\n", + "pprint(fields_dict(CanonicalPythonDef))\n", + "pprint(fields_dict(CanonicalPythonDef.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To set additional attributes other than the type and default, such as `allowed_values`\n", + "and `validators`, `python.arg` and `python.out` can be used instead." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import attrs.validators\n", + "\n", + "\n", + "@python.define\n", + "class CanonicalPythonDef(PythonDef[\"CanonicalPythonDef.Outputs\"]):\n", + " \"\"\"Canonical Python task definition class for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + " \"\"\"\n", + "\n", + " a: int = python.arg(allowed_values=[1, 2, 3, 4, 5])\n", + " b: float = python.arg(default=2.0, validator=attrs.validators.not_(0))\n", + "\n", + " class Outputs(PythonOutputs):\n", + " \"\"\"\n", + " Args:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + "\n", + " c: float\n", + " d: float\n", + "\n", + " @staticmethod\n", + " def function(a, b):\n", + " return a + b, a / b\n", + "\n", + "pprint(fields_dict(CanonicalPythonDef))\n", + "pprint(fields_dict(CanonicalPythonDef.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Shell-task definitions\n", + "\n", + "The canonical form of shell tasks is the same as for Python tasks, except a string `executable`\n", + "attribute replaces the `function` staticmethod." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "from fileformats import generic\n", + "from pydra.design import shell\n", + "from pydra.engine.specs import ShellDef, ShellOutputs\n", + "from pydra.utils.typing import MultiInputObj\n", + "\n", + "\n", + "@shell.define\n", + "class CpWithSize(ShellDef[\"CpWithSize.Outputs\"]):\n", + "\n", + " executable = \"cp\"\n", + "\n", + " in_fs_objects: MultiInputObj[generic.FsObject]\n", + " recursive: bool = shell.arg(argstr=\"-R\")\n", + " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", + " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", + " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", + "\n", + " class Outputs(ShellOutputs):\n", + "\n", + " @staticmethod\n", + " def get_file_size(out_file: Path) -> int:\n", + " \"\"\"Calculate the file size\"\"\"\n", + " result = os.stat(out_file)\n", + " return result.st_size\n", + "\n", + " out_file: generic.File\n", + " out_file_size: int = shell.out(callable=get_file_size)\n", + "\n", + "\n", + "pprint(fields_dict(CpWithSize))\n", + "pprint(fields_dict(CpWithSize.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Workflow definitions\n", + "\n", + "Workflows can also be defined in canonical form, which is the same as for Python tasks\n", + "but with a staticmethod called `constructor` that constructs the workflow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import python, workflow\n", + "from pydra.engine.specs import WorkflowDef, WorkflowOutputs\n", + "\n", + "# Example python task definitions\n", + "@python.define\n", + "def Add(a, b):\n", + " return a + b\n", + "\n", + "\n", + "@python.define\n", + "def Mul(a, b):\n", + " return a * b\n", + "\n", + "\n", + "@workflow.define\n", + "class CanonicalWorkflowDef(WorkflowDef[\"CanonicalWorkflowDef.Outputs\"]):\n", + "\n", + " @staticmethod\n", + " def a_converter(value):\n", + " if value is None:\n", + " return value\n", + " return float(value)\n", + "\n", + " a: int\n", + " b: float = workflow.arg(\n", + " help=\"A float input\",\n", + " converter=a_converter,\n", + " )\n", + "\n", + " @staticmethod\n", + " def constructor(a, b):\n", + " add = workflow.add(Add(a=a, b=b))\n", + " mul = workflow.add(Mul(a=add.out, b=b))\n", + " return mul.out\n", + "\n", + " class Outputs(WorkflowOutputs):\n", + " out: float" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py new file mode 100644 index 0000000000..d84d9ff4d3 --- /dev/null +++ b/new-docs/source/tutorial/tst.py @@ -0,0 +1,77 @@ +import tempfile +from pathlib import Path +import numpy as np +from fileformats.medimage import Nifti1 +import fileformats.medimage_mrtrix3 as mrtrix3 +from pydra.engine.environments import Docker +from pydra.design import workflow, python +from pydra.tasks.mrtrix3.v3_0 import MrConvert, MrThreshold + +MRTRIX2NUMPY_DTYPES = { + "Int8": np.dtype("i1"), + "UInt8": np.dtype("u1"), + "Int16LE": np.dtype("i2"), + "UInt16LE": np.dtype("u2"), + "Int32LE": np.dtype("i4"), + "UInt32LE": np.dtype("u4"), + "Float32LE": np.dtype("f4"), + "Float64LE": np.dtype("f8"), + "CFloat32LE": np.dtype("c8"), + "CFloat64LE": np.dtype("c16"), +} + + +@workflow.define(outputs=["out_image"]) +def ToyMedianThreshold(in_image: Nifti1) -> mrtrix3.ImageFormat: + """A toy example workflow that + + * converts a NIfTI image to MRTrix3 image format with a separate header + * loads the separate data file and selects the median value + """ + + input_conversion = workflow.add( + MrConvert(in_file=in_image, out_file="out_file.mih"), + name="input_conversion", + environment=Docker("mrtrix3/mrtrix3", tag="latest"), + ) + + @python.define + def Median(mih: mrtrix3.ImageHeader) -> float: + """A bespoke function that reads the separate data file in the MRTrix3 image + header format (i.e. .mih) and calculates the median value.""" + dtype = MRTRIX2NUMPY_DTYPES[mih.metadata["datatype"].strip()] + data = np.frombuffer(Path.read_bytes(mih.data_file), dtype=dtype) + return np.median(data) + + median = workflow.add(Median(mih=input_conversion.out_file)) + threshold = workflow.add( + MrThreshold(in_file=in_image, out_file="binary.mif", abs=median.out), + environment=Docker("mrtrix3/mrtrix3", tag="latest"), + ) + + output_conversion = workflow.add( + MrConvert(in_file=threshold.out_file, out_file="out_image.mif"), + name="output_conversion", + environment=Docker("mrtrix3/mrtrix3", tag="latest"), + ) + + return output_conversion.out_file + + +test_dir = tempfile.mkdtemp() + +nifti_file = Nifti1.sample(test_dir, seed=0) + +wf = ToyMedianThreshold(in_image=nifti_file) + +outputs = wf() + +print(outputs) diff --git a/new-docs/tst.py b/new-docs/tst.py new file mode 100644 index 0000000000..461b89d79a --- /dev/null +++ b/new-docs/tst.py @@ -0,0 +1,14 @@ +from fileformats.application import Json +from pydra.tasks.common import LoadJson + +# Create a sample JSON file to test +json_file = Json.sample() + +# Parameterise the task to load the JSON file +load_json = LoadJson(file=json_file) + +# Run the task +outputs = load_json(plugin="serial") + +# Print the output interface of the of the task (LoadJson.Outputs) +print(outputs) diff --git a/pydra/mark/tests/__init__.py b/new_file_1.txt similarity index 100% rename from pydra/mark/tests/__init__.py rename to new_file_1.txt diff --git a/new_file_2.txt b/new_file_2.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/newfile_tmp.txt b/newfile_tmp.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/notebooks/examples b/notebooks/examples new file mode 120000 index 0000000000..9c255e151f --- /dev/null +++ b/notebooks/examples @@ -0,0 +1 @@ +../new-docs/source/examples \ No newline at end of file diff --git a/notebooks/tutorial b/notebooks/tutorial new file mode 120000 index 0000000000..7d3c73de2d --- /dev/null +++ b/notebooks/tutorial @@ -0,0 +1 @@ +../new-docs/source/tutorial \ No newline at end of file diff --git a/pydra/__init__.py b/pydra/__init__.py deleted file mode 100644 index f704d670a5..0000000000 --- a/pydra/__init__.py +++ /dev/null @@ -1,49 +0,0 @@ -""" -The Pydra workflow engine. - -Pydra is a rewrite of the Nipype engine with mapping and joining as -first-class operations. It forms the core of the Nipype 2.0 ecosystem. - -""" - -# This call enables pydra.tasks to be used as a namespace package when installed -# in editable mode. In normal installations it has no effect. -__path__ = __import__("pkgutil").extend_path(__path__, __name__) - -import logging - -import __main__ -import attr - -from . import mark -from .engine import AuditFlag, ShellCommandTask, Submitter, Workflow, specs - -__all__ = ( - "Submitter", - "Workflow", - "AuditFlag", - "ShellCommandTask", - "specs", - "mark", -) - -try: - from ._version import __version__ -except ImportError: - pass - -logger = logging.getLogger("pydra") - - -def check_latest_version(): - import etelemetry - - return etelemetry.check_available_version("nipype/pydra", __version__, lgr=logger) - - -# Run telemetry on import for interactive sessions, such as IPython, Jupyter notebooks, Python REPL -if not hasattr(__main__, "__file__"): - from .engine.core import TaskBase - - if TaskBase._etelemetry_version_data is None: - TaskBase._etelemetry_version_data = check_latest_version() diff --git a/pydra/conftest.py b/pydra/conftest.py index 66a1d200fc..3deb3df341 100644 --- a/pydra/conftest.py +++ b/pydra/conftest.py @@ -20,7 +20,7 @@ def pytest_generate_tests(metafunc): if bool(shutil.which("sbatch")): Plugins = ["slurm"] else: - Plugins = ["cf"] + Plugins = ["debug"] # ["debug", "cf"] try: if metafunc.config.getoption("dask"): Plugins.append("dask") @@ -50,7 +50,7 @@ def pytest_generate_tests(metafunc): elif bool(shutil.which("sbatch")): Plugins = ["slurm"] else: - Plugins = ["cf"] + Plugins = ["debug"] # ["debug", "cf"] try: if metafunc.config.getoption("psij"): Plugins.append("psij-" + metafunc.config.getoption("psij")) diff --git a/pydra/design/base.py b/pydra/design/base.py new file mode 100644 index 0000000000..b1b86797b3 --- /dev/null +++ b/pydra/design/base.py @@ -0,0 +1,1086 @@ +import typing as ty +import types +import inspect +import re +import enum +from pathlib import Path +from copy import copy +from typing import Self +import attrs.validators +from attrs.converters import default_if_none +from fileformats.generic import File, FileSet +from pydra.utils.typing import TypeParser, is_optional, is_fileset_or_union, is_type +from pydra.engine.helpers import ( + from_list_if_single, + ensure_list, + PYDRA_ATTR_METADATA, + list_fields, + is_lazy, +) +from pydra.utils.typing import ( + MultiInputObj, + MultiInputFile, + MultiOutputObj, + MultiOutputFile, +) +from pydra.utils.hash import hash_function + + +if ty.TYPE_CHECKING: + from pydra.engine.specs import TaskDef, TaskOutputs + + +__all__ = [ + "Field", + "Arg", + "Out", + "ensure_field_objects", + "make_task_def", +] + + +class _Empty(enum.Enum): + + NO_DEFAULT = enum.auto() + + def __repr__(self): + return "NO_DEFAULT" + + def __bool__(self): + return False + + +NO_DEFAULT = _Empty.NO_DEFAULT # To provide a blank placeholder for the default field + + +def convert_default_value(value: ty.Any, self_: "Field") -> ty.Any: + """Ensure the default value has been coerced into the correct type""" + if value is NO_DEFAULT or isinstance(value, attrs.Factory): + return value + if self_.type is ty.Callable and isinstance(value, ty.Callable): + return value + if isinstance(self_, Out) and TypeParser.contains_type(FileSet, self_.type): + return value + return TypeParser[self_.type](self_.type, label=self_.name)(value) + + +def allowed_values_converter(value: ty.Iterable[str] | None) -> list[str] | None: + """Ensure the allowed_values field is a list of strings or None""" + if value is None: + return None + return list(value) + + +@attrs.define +class Requirement: + """Define a requirement for a task input field + + Parameters + ---------- + name : str + The name of the input field that is required + allowed_values : list[str], optional + The allowed values for the input field that is required, if not provided any + value is allowed + """ + + name: str + allowed_values: list[str] | None = attrs.field( + default=None, converter=allowed_values_converter + ) + + def satisfied(self, inputs: "TaskDef") -> bool: + """Check if the requirement is satisfied by the inputs""" + value = getattr(inputs, self.name) + field = {f.name: f for f in list_fields(inputs)}[self.name] + if value is None or field.type is bool and value is False: + return False + if self.allowed_values is None: + return True + return value in self.allowed_values + + @classmethod + def parse(cls, value: ty.Any) -> Self: + if isinstance(value, Requirement): + return value + elif isinstance(value, str): + return Requirement(value) + elif isinstance(value, tuple): + name, allowed_values = value + if isinstance(allowed_values, str) or not isinstance( + allowed_values, ty.Collection + ): + raise ValueError( + f"allowed_values must be a collection of strings, not {allowed_values}" + ) + return Requirement(name, allowed_values) + else: + raise ValueError( + f"Requirements must be a input field name, a tuple of an input field " + f"name and allowed values or a Requirement object, not {value!r}" + ) + + def __str__(self): + if not self.allowed_values: + return self.name + return f"{self.name}(" + ",".join(repr(v) for v in self.allowed_values) + ")" + + +def requirements_converter(value: ty.Any) -> list[Requirement]: + """Ensure the requires field is a list of Requirement objects""" + if isinstance(value, Requirement): + return [value] + elif isinstance(value, (str, tuple)): + try: + return [Requirement.parse(value)] + except ValueError as e: + e.add_note( + f"Parsing requirements specification {value!r} as a single requirement" + ) + raise e + try: + return [Requirement.parse(v) for v in value] + except ValueError as e: + e.add_note( + f"Parsing requirements specification {value!r} as a set of concurrent " + "requirements (i.e. logical AND)" + ) + raise e + + +@attrs.define +class RequirementSet: + """Define a set of requirements for a task input field, all of which must be satisfied""" + + requirements: list[Requirement] = attrs.field( + factory=list, + converter=requirements_converter, + ) + + def satisfied(self, inputs: "TaskDef") -> bool: + """Check if all the requirements are satisfied by the inputs""" + return all(req.satisfied(inputs) for req in self.requirements) + + def __str__(self): + if len(self.requirements) == 1: + return str(self.requirements[0]) + return "+".join(str(r) for r in self.requirements) + + def __iter__(self): + return iter(self.requirements) + + def __iadd__(self, other: "RequirementSet | list[Requirement]") -> "RequirementSet": + self.requirements.extend(requirements_converter(other)) + return self + + +def requires_converter( + value: ( + str + | ty.Collection[ + Requirement | str | ty.Collection[str | tuple[str, ty.Collection[ty.Any]]] + ] + ), +) -> list[RequirementSet]: + """Ensure the requires field is a tuple of tuples""" + if isinstance(value, (str, tuple, Requirement)): + try: + return [RequirementSet(value)] + except ValueError as e: + e.add_note( + f"Parsing requirements set specification {value!r} as a single requirement set" + ) + raise e + try: + return [RequirementSet(v) for v in value] + except ValueError as e: + e.add_note( + f"Parsing requirements set specification {value!r} as a set of alternative " + "requirements (i.e. logical OR)" + ) + raise e + + +@attrs.define(kw_only=True) +class Field: + """Base class for input and output fields to task definitions + + Parameters + ---------- + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + type: type, optional + The type of the field, by default it is Any + from name to field, by default it is None + default : Any, optional + the default value for the field, by default it is NO_DEFAULT + help: str, optional + A short description of the input field. + requires: str | list[str | list[str] | Requirement], optional + The input fields that are required to be provided, along with the optional allowed + values, that are required together with the field. Can be provided + as a single name, a collection of names, a collection of collections of names, + or a collection of collection of name/allowed values pairs. + converter: callable, optional + The converter for the field passed through to the attrs.field, by default it is None + validator: callable | iterable[callable], optional + The validator(s) for the field passed through to the attrs.field, by default it is None + hash_eq: bool, optional + Whether to use the hash of the value for equality comparison, by default it is False + """ + + name: str | None = None + type: ty.Type[ty.Any] = attrs.field( + validator=is_type, default=ty.Any, converter=default_if_none(ty.Any) + ) + default: ty.Any = attrs.field( + default=NO_DEFAULT, + converter=attrs.Converter(convert_default_value, takes_self=True), + ) + help: str = "" + requires: list[RequirementSet] = attrs.field( + factory=list, converter=requires_converter + ) + converter: ty.Callable[..., ty.Any] | None = None + validator: ty.Callable[..., bool] | None = None + hash_eq: bool = False + + def requirements_satisfied(self, inputs: "TaskDef") -> bool: + """Check if all the requirements are satisfied by the inputs""" + return any(req.satisfied(inputs) for req in self.requires) + + @property + def mandatory(self): + return self.default is NO_DEFAULT + + @requires.validator + def _requires_validator(self, _, value): + if value and self.type not in (ty.Any, bool) and not is_optional(self.type): + raise ValueError( + f"Fields with requirements must be of optional type (i.e. in union " + f"with None) or boolean, not type {self.type} ({self!r})" + ) + + +@attrs.define(kw_only=True) +class Arg(Field): + """Base class for input fields of task definitions + + Parameters + ---------- + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + type: type, optional + The type of the field, by default it is Any + default : Any, optional + the default value for the field, by default it is NO_DEFAULT + help: str + A short description of the input field. + requires: list, optional + Names of the inputs that are required together with the field. + allowed_values: Sequence, optional + List of allowed values for the field. + copy_mode: File.CopyMode, optional + The mode of copying the file, by default it is File.CopyMode.any + copy_collation: File.CopyCollation, optional + The collation of the file, by default it is File.CopyCollation.any + copy_ext_decomp: File.ExtensionDecomposition, optional + The extension decomposition of the file, by default it is + File.ExtensionDecomposition.single + readonly: bool, optional + If True the input field can’t be provided by the user but it aggregates other + input fields (for example the fields with argstr: -o {fldA} {fldB}), by default + it is False + """ + + allowed_values: frozenset = attrs.field(default=(), converter=frozenset) + copy_mode: File.CopyMode = File.CopyMode.any + copy_collation: File.CopyCollation = File.CopyCollation.any + copy_ext_decomp: File.ExtensionDecomposition = File.ExtensionDecomposition.single + readonly: bool = False + + +@attrs.define(kw_only=True, slots=False) +class Out(Field): + """Base class for output fields of task definitions + + Parameters + ---------- + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + type: type, optional + The type of the field, by default it is Any + default : Any, optional + the default value for the field, by default it is NO_DEFAULT + help: str, optional + A short description of the input field. + requires: list, optional + Names of the inputs that are required together with the field. + converter: callable, optional + The converter for the field passed through to the attrs.field, by default it is None + validator: callable | iterable[callable], optional + The validator(s) for the field passed through to the attrs.field, by default it is None + """ + + pass + + +def extract_fields_from_class( + spec_type: type["TaskDef"], + outputs_type: type["TaskOutputs"], + klass: type, + arg_type: type[Arg], + out_type: type[Out], + auto_attribs: bool, +) -> tuple[dict[str, Arg], dict[str, Out]]: + """Extract the input and output fields from an existing class + + Parameters + ---------- + klass : type + The class to extract the fields from + arg_type : type + The type of the input fields + out_type : type + The type of the output fields + auto_attribs : bool + Whether to assume that all attribute annotations should be interpreted as + fields or not + + Returns + ------- + inputs : dict[str, Arg] + The input fields extracted from the class + outputs : dict[str, Out] + The output fields extracted from the class + """ + + input_helps, _ = parse_doc_string(klass.__doc__) + + def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: + """Get the fields from a class""" + fields_dict = {} + # Get fields defined in base classes if present + for field in list_fields(klass): + fields_dict[field.name] = field + type_hints = ty.get_type_hints(klass) + for atr_name in dir(klass): + if atr_name in ["Task", "Outputs"] or atr_name.startswith("__"): + continue + try: + atr = getattr(klass, atr_name) + except Exception: + continue + if isinstance(atr, Field): + atr.name = atr_name + fields_dict[atr_name] = atr + if atr_name in type_hints: + atr.type = type_hints[atr_name] + if not atr.help: + atr.help = helps.get(atr_name, "") + elif atr_name in type_hints: + if atr_name.startswith("_"): + continue + if atr_name in fields_dict: + fields_dict[atr_name].type = type_hints[atr_name] + elif auto_attribs: + fields_dict[atr_name] = field_type( + name=atr_name, + type=type_hints[atr_name], + default=atr, + help=helps.get(atr_name, ""), + ) + if auto_attribs: + for atr_name, type_ in type_hints.items(): + if atr_name.startswith("_"): + continue + if atr_name not in list(fields_dict) + ["Task", "Outputs"]: + fields_dict[atr_name] = field_type( + name=atr_name, type=type_, help=helps.get(atr_name, "") + ) + return fields_dict + + if not issubclass(klass, spec_type): + raise ValueError( + f"When using the canonical form for {spec_type.__module__.split('.')[-1]} " + f"tasks, {klass} must inherit from {spec_type}" + ) + + inputs = get_fields(klass, arg_type, auto_attribs, input_helps) + + try: + outputs_klass = klass.Outputs + except AttributeError: + raise AttributeError( + f"Nested Outputs class not found in {klass.__name__}" + ) from None + if not issubclass(outputs_klass, outputs_type): + raise ValueError( + f"When using the canonical form for {outputs_type.__module__.split('.')[-1]} " + f"task outputs {outputs_klass}, you must inherit from {outputs_type}" + ) + + output_helps, _ = parse_doc_string(outputs_klass.__doc__) + outputs = get_fields(outputs_klass, out_type, auto_attribs, output_helps) + + return inputs, outputs + + +def make_task_def( + spec_type: type["TaskDef"], + out_type: type["TaskOutputs"], + inputs: dict[str, Arg], + outputs: dict[str, Out], + klass: type | None = None, + name: str | None = None, + bases: ty.Sequence[type] = (), + outputs_bases: ty.Sequence[type] = (), + xor: ty.Sequence[str | None] | ty.Sequence[ty.Sequence[str | None]] = (), +): + """Create a task definition class and its outputs definition class from the + input and output fields provided to the decorator/function. + + Modifies the class so that its attributes are converted from pydra fields to attrs fields + and then calls `attrs.define` to create an attrs class (dataclass-like). + on + + Parameters + ---------- + task_type : type + The type of the task to be created + inputs : dict[str, Arg] + The input fields of the task + outputs : dict[str, Out] + The output fields of the task + klass : type, optional + The class to be decorated, by default None + name : str, optional + The name of the class, by default + bases : ty.Sequence[type], optional + The base classes for the task definition class, by default () + outputs_bases : ty.Sequence[type], optional + The base classes for the outputs definition class, by default () + xor: Sequence[str | None] | Sequence[Sequence[str | None]], optional + Names of args that are exclusive mutually exclusive, which must include + the name of the current field. If this list includes None, then none of the + fields need to be set. + + Returns + ------- + klass : type + The class created using the attrs package + """ + + # Convert a single xor set into a set of xor sets + if not xor: + xor = frozenset() + elif all(isinstance(x, str) or x is None for x in xor): + xor = frozenset([frozenset(xor)]) + else: + xor = frozenset(frozenset(x) for x in xor) + + spec_type._check_arg_refs(inputs, outputs, xor) + + # Check that the field attributes are valid after all fields have been set + # (especially the type) + for inpt in inputs.values(): + attrs.validate(inpt) + for outpt in outputs.values(): + attrs.validate(outpt) + + if name is None and klass is not None: + name = klass.__name__ + if reserved_names := [n for n in inputs if n in spec_type.RESERVED_FIELD_NAMES]: + raise ValueError( + f"{reserved_names} are reserved and cannot be used for {spec_type} field names" + ) + outputs_klass = make_outputs_spec(out_type, outputs, outputs_bases, name) + if klass is None: + if name is None: + raise ValueError("name must be provided if klass is not") + bases = tuple(bases) + # Ensure that TaskDef is a base class + if not any(issubclass(b, spec_type) for b in bases): + bases = bases + (spec_type,) + # If building from a decorated class (as opposed to dynamically from a function + # or shell-template), add any base classes not already in the bases tuple + if klass is not None: + bases += tuple(c for c in klass.__mro__ if c not in bases + (object,)) + # Create a new class with the TaskDef as a base class + klass = types.new_class( + name=name, + bases=bases, + kwds={}, + exec_body=lambda ns: ns.update({"Outputs": outputs_klass}), + ) + else: + # Ensure that the class has it's own annotations dict so we can modify it without + # messing up other classes + klass.__annotations__ = copy(klass.__annotations__) + klass.Outputs = outputs_klass + # Now that we have saved the attributes in lists to be + for arg in inputs.values(): + # If an outarg input then the field type should be Path not a FileSet + attrs_kwargs = _get_attrs_kwargs(arg) + if isinstance(arg, Out) and is_fileset_or_union(arg.type): + if getattr(arg, "path_template", False): + if is_optional(arg.type): + field_type = Path | bool | None + if arg.default is NO_DEFAULT: + attrs_kwargs["default"] = True if arg.requires else None + del attrs_kwargs["factory"] + else: + field_type = Path | bool + if arg.default is NO_DEFAULT: + attrs_kwargs["default"] = True # use the template by default + del attrs_kwargs["factory"] + elif is_optional(arg.type): + field_type = Path | None + else: + field_type = Path + else: + field_type = arg.type + setattr( + klass, + arg.name, + attrs.field( + converter=make_converter(arg, klass.__name__, field_type), + validator=make_validator(arg, klass.__name__), + metadata={PYDRA_ATTR_METADATA: arg}, + on_setattr=attrs.setters.convert, + **attrs_kwargs, + ), + ) + # Store the xor sets for the class + klass._xor = xor + klass.__annotations__[arg.name] = field_type + + # Create class using attrs package, will create attributes for all columns and + # parameters + attrs_klass = attrs.define(auto_attribs=False, kw_only=True, eq=False)(klass) + + return attrs_klass + + +def make_outputs_spec( + spec_type: type["TaskOutputs"], + outputs: dict[str, Out], + bases: ty.Sequence[type], + spec_name: str, +) -> type["TaskOutputs"]: + """Create an outputs definition class and its outputs definition class from the + output fields provided to the decorator/function. + + Creates a new class with attrs fields and then calls `attrs.define` to create an + attrs class (dataclass-like). + + Parameters + ---------- + outputs : dict[str, Out] + The output fields of the task + bases : ty.Sequence[type], optional + The base classes for the outputs definition class, by default () + spec_name : str + The name of the task definition class the outputs are for + + Returns + ------- + klass : type + The class created using the attrs package + """ + from pydra.engine.specs import TaskOutputs + + if not any(issubclass(b, spec_type) for b in bases): + if out_spec_bases := [b for b in bases if issubclass(b, TaskOutputs)]: + raise ValueError( + f"Cannot make {spec_type} output definition from {out_spec_bases} bases" + ) + outputs_bases = bases + (spec_type,) + if reserved_names := [n for n in outputs if n in spec_type.RESERVED_FIELD_NAMES]: + raise ValueError( + f"{reserved_names} are reserved and cannot be used for {spec_type} field names" + ) + # Add in any fields in base classes that haven't already been converted into attrs + # fields (e.g. stdout, stderr and return_code) + for base in outputs_bases: + base_outputs = { + n: o + for n, o in base.__dict__.items() + if isinstance(o, Out) and n not in outputs + } + for name, field in base_outputs.items(): + field.name = name + field.type = base.__annotations__.get(name, ty.Any) + outputs.update(base_outputs) + assert all(o.name == n for n, o in outputs.items()) + outputs_klass = type( + spec_name + "Outputs", + tuple(outputs_bases), + { + n: attrs.field( + converter=make_converter(o, f"{spec_name}.Outputs"), + metadata={PYDRA_ATTR_METADATA: o}, + **_get_attrs_kwargs(o), + ) + for n, o in outputs.items() + }, + ) + outputs_klass.__annotations__.update((o.name, o.type) for o in outputs.values()) + outputs_klass = attrs.define(auto_attribs=False, kw_only=True, eq=False)( + outputs_klass + ) + return outputs_klass + + +def ensure_field_objects( + arg_type: type[Arg], + out_type: type[Out], + doc_string: str | None = None, + inputs: dict[str, Arg | type] | None = None, + outputs: dict[str, Out | type] | None = None, + input_helps: dict[str, str] | None = None, + output_helps: dict[str, str] | None = None, +) -> tuple[dict[str, Arg], dict[str, Out]]: + """Converts dicts containing input/output types into input/output, including any + help strings to the appropriate inputs and outputs + + Parameters + ---------- + arg_type : type + The type of the input fields + out_type : type + The type of the output fields + doc_string : str, optional + The docstring of the function or class + inputs : dict[str, Arg | type], optional + The inputs to the function or class + outputs : dict[str, Out | type], optional + The outputs of the function or class + input_helps : dict[str, str], optional + The help strings for the inputs + output_helps : dict[str, str], optional + The help strings for the outputs + + Returns + ------- + inputs : dict[str, Arg] + The input fields with help strings added + outputs : dict[str, Out] + The output fields with help strings added + """ + + for input_name, arg in list(inputs.items()): + if isinstance(arg, Arg): + if arg.name is None: + arg.name = input_name + elif arg.name != input_name: + raise ValueError( + "Name of the argument must be the same as the key in the " + f"dictionary. The argument name is {arg.name} and the key " + f"is {input_name}" + ) + else: + arg.name = input_name + if not arg.help: + arg.help = input_helps.get(input_name, "") + elif is_type(arg): + inputs[input_name] = arg_type( + type=arg, + name=input_name, + help=input_helps.get(input_name, ""), + ) + elif isinstance(arg, dict): + arg_kwds = copy(arg) + if "help" not in arg_kwds: + arg_kwds["help"] = input_helps.get(input_name, "") + inputs[input_name] = arg_type( + name=input_name, + **arg_kwds, + ) + else: + raise ValueError( + f"Input {input_name} must be an instance of {Arg}, a type, or a dictionary " + f" of keyword arguments to pass to {Arg}, not {arg}" + ) + + for output_name, out in list(outputs.items()): + if isinstance(out, Out): + if out.name is None: + out.name = output_name + elif out.name != output_name: + raise ValueError( + "Name of the argument must be the same as the key in the " + f"dictionary. The argument name is {out.name} and the key " + f"is {output_name}" + ) + else: + out.name = output_name + if not out.help: + out.help = output_helps.get(output_name, "") + elif inspect.isclass(out) or ty.get_origin(out): + outputs[output_name] = out_type( + type=out, + name=output_name, + help=output_helps.get(output_name, ""), + ) + elif isinstance(out, dict): + out_kwds = copy(out) + if "help" not in out_kwds: + out_kwds["help"] = output_helps.get(output_name, "") + outputs[output_name] = out_type( + name=output_name, + **out_kwds, + ) + elif isinstance(out, ty.Callable) and hasattr(out_type, "callable"): + outputs[output_name] = out_type( + name=output_name, + type=ty.get_type_hints(out).get("return", ty.Any), + callable=out, + help=re.split(r"\n\s*\n", out.__doc__)[0] if out.__doc__ else "", + ) + else: + raise ValueError( + f"Unrecognised value provided to outputs ({arg}), can be either {out_type} " + "type" + (" or callable" if hasattr(out_type, "callable") else "") + ) + + return inputs, outputs + + +def make_converter( + field: Field, interface_name: str, field_type: ty.Type | None = None +) -> ty.Callable[..., ty.Any]: + """Makes an attrs converter for the field, combining type checking with any explicit + converters + + Parameters + ---------- + field : Field + The field to make the converter for + interface_name : str + The name of the interface the field is part of + field_type : type, optional + The type of the field, by default None + + Returns + ------- + converter : callable + The converter for the field + """ + if field_type is None: + field_type = field.type + checker_label = f"'{field.name}' field of {interface_name} interface" + type_checker = TypeParser[field_type]( + field_type, label=checker_label, superclass_auto_cast=True + ) + converters = [] + if field_type in (MultiInputObj, MultiInputFile): + converters.append(ensure_list) + elif field_type in (MultiOutputObj, MultiOutputFile): + converters.append(from_list_if_single) + if field.converter: + converters.append(field.converter) + if converters: + converters.append(type_checker) + converter = attrs.converters.pipe(*converters) + else: + converter = type_checker + return converter + + +def make_validator(field: Field, interface_name: str) -> ty.Callable[..., None] | None: + """Makes an attrs validator for the field, combining allowed values and any explicit + validators + + Parameters + ---------- + field : Field + The field to make the validator for + interface_name : str + The name of the interface the field is part of + + Returns + ------- + validator : callable + The validator for the field + """ + validators = [] + if field.allowed_values: + validators.append(allowed_values_validator) + if isinstance(field.validator, ty.Iterable): + validators.extend(field.validator) + elif field.validator: + validators.append(field.validator) + if len(validators) > 1: + return validators + elif validators: + return validators[0] + return None + + +def allowed_values_validator(_, attribute, value): + """checking if the values is in allowed_values""" + allowed = attribute.metadata[PYDRA_ATTR_METADATA].allowed_values + if value is attrs.NOTHING or is_lazy(value): + pass + elif value is None and is_optional(attribute.type): + pass + elif value not in allowed: + raise ValueError( + f"value of {attribute.name} has to be from {allowed}, but {value} provided" + ) + + +def extract_function_inputs_and_outputs( + function: ty.Callable, + arg_type: type[Arg], + inputs: list[str | Arg] | dict[str, Arg | type] | None = None, + outputs: list[str | Out] | dict[str, Out | type] | type | None = None, +) -> tuple[dict[str, type | Arg], dict[str, type | Out]]: + """Extract input output types and output names from the function source if they + aren't explicitly + + Parameters + ---------- + function : callable + The function to extract the inputs and outputs from + arg_type : type + The type of the input fields + out_type : type + The type of the output fields + inputs : list[str | Arg] | dict[str, Arg | type] | None + The inputs to the function + outputs : list[str | Out] | dict[str, Out | type] | type | None + The outputs of the function + + Returns + ------- + inputs : dict[str, Arg] + The input fields extracted from the function + outputs : dict[str, Out] + The output fields extracted from the function + """ + # if undefined_symbols := get_undefined_symbols( + # function, exclude_signature_type_hints=True, ignore_decorator=True + # ): + # raise ValueError( + # f"The following symbols are not defined within the scope of the function " + # f"{function!r}, {undefined_symbols}. Ensure that all imports are " + # "defined within the function scope so it is portable" + # ) + sig = inspect.signature(function) + type_hints = ty.get_type_hints(function) + input_types = {} + input_defaults = {} + has_varargs = False + for p in sig.parameters.values(): + if p.kind is p.VAR_POSITIONAL or p.kind is p.VAR_KEYWORD: + has_varargs = True + continue + input_types[p.name] = type_hints.get(p.name, ty.Any) + if p.default is not inspect.Parameter.empty: + input_defaults[p.name] = p.default + if inputs: + if not isinstance(inputs, dict): + raise ValueError( + f"Input names ({inputs}) should not be provided when " + "wrapping/decorating a function as " + ) + if not has_varargs: + if unrecognised := set(inputs) - set(input_types): + raise ValueError( + f"Unrecognised input names ({unrecognised}) not present in the signature " + f"of the function {function!r}" + ) + for inpt_name, type_ in input_types.items(): + try: + inpt = inputs[inpt_name] + except KeyError: + inputs[inpt_name] = type_ + else: + if isinstance(inpt, Arg) and inpt.type is ty.Any: + inpt.type = type_ + else: + inputs = input_types + for inpt_name, default in input_defaults.items(): + inpt = inputs[inpt_name] + if isinstance(inpt, arg_type): + if inpt.default is NO_DEFAULT: + inpt.default = default + elif inspect.isclass(inpt) or ty.get_origin(inpt): + inputs[inpt_name] = arg_type(type=inpt, default=default) + else: + raise ValueError( + f"Unrecognised input type ({inpt}) for input {inpt_name} with default " + f"value {default}" + ) + return_type = type_hints.get("return", ty.Any) + if outputs and len(outputs) > 1: + if return_type is not ty.Any: + if ty.get_origin(return_type) is not tuple: + raise ValueError( + f"Multiple outputs specified ({outputs}) but non-tuple " + f"return value {return_type}" + ) + return_types = ty.get_args(return_type) + if len(return_types) != len(outputs): + raise ValueError( + f"Length of the outputs ({outputs}) does not match that " + f"of the return types ({return_types})" + ) + output_types = dict(zip(outputs, return_types)) + else: + output_types = {o: ty.Any for o in outputs} + if isinstance(outputs, dict): + for output_name, output in outputs.items(): + if isinstance(output, Out) and output.type is ty.Any: + output.type = output_types[output_name] + else: + outputs = output_types + + elif outputs: + if isinstance(outputs, dict): + output_name, output = next(iter(outputs.items())) + elif isinstance(outputs, list): + output_name = outputs[0] + output = ty.Any + if isinstance(output, Out): + if output.type is ty.Any: + output.type = return_type + elif output is ty.Any: + output = return_type + outputs = {output_name: output} + else: + outputs = {"out": return_type} + return inputs, outputs + + +def parse_doc_string(doc_str: str) -> tuple[dict[str, str], dict[str, str] | list[str]]: + """Parse the docstring to pull out the description of the parameters/args and returns + + Parameters + ----------- + doc_string + the doc string to parse + + Returns + ------- + input_helps + the documentation for each of the parameter/args of the class/function + output_helps + the documentation for each of the return values of the class function, if no + names are provided then the help strings are returned as a list + """ + input_helps = {} + output_helps = {} + if doc_str is None: + return input_helps, output_helps + for param, param_help in re.findall(r":param (\w+): (.*)", doc_str): + input_helps[param] = param_help + for return_val, return_help in re.findall(r":return (\w+): (.*)", doc_str): + output_helps[return_val] = return_help + google_args_match = re.match( + r".*\n\s*Args:\n(.*)", doc_str, flags=re.DOTALL | re.MULTILINE + ) + google_returns_match = re.match( + r".*\n\s*Returns:\n(.*)", doc_str, flags=re.DOTALL | re.MULTILINE + ) + if google_args_match: + args_str = google_args_match.group(1) + for arg_str in split_block(args_str): + arg_name, arg_help = arg_str.split(":", maxsplit=1) + arg_name = arg_name.strip() + arg_help = white_space_re.sub(" ", arg_help).strip() + input_helps[arg_name] = arg_help + if google_returns_match: + returns_str = google_returns_match.group(1) + for return_str in split_block(returns_str): + return_name, return_help = return_str.split(":", maxsplit=1) + return_name = return_name.strip() + return_help = white_space_re.sub(" ", return_help).strip() + output_helps[return_name] = return_help + numpy_args_match = re.match( + r".*\n\s+Parameters\n\s*----------\s*\n(.*)", + doc_str, + flags=re.DOTALL | re.MULTILINE, + ) + numpy_returns_match = re.match( + r".*\n\s+Returns\n\s*-------\s*\n(.*)", doc_str, flags=re.DOTALL | re.MULTILINE + ) + if numpy_args_match: + args_str = numpy_args_match.group(1) + for arg_str in split_block(args_str): + arg_decl, arg_help = arg_str.split("\n", maxsplit=1) + arg_name = arg_decl.split(":")[0].strip() + arg_help = white_space_re.sub(" ", arg_help).strip() + input_helps[arg_name] = arg_help + if numpy_returns_match: + returns_str = numpy_returns_match.group(1) + for return_str in split_block(returns_str): + return_decl, return_help = return_str.split("\n", maxsplit=1) + return_name = return_decl.split(":")[0].strip() + return_help = white_space_re.sub(" ", return_help).strip() + output_helps[return_name] = return_help + return input_helps, output_helps + + +def split_block(string: str) -> ty.Generator[str, None, None]: + """Split a block of text into groups lines""" + indent_re = re.compile(r"^\s*") + leading_indent = indent_re.match(string).group() + leading_indent_len = len(leading_indent) + block = "" + for line in string.split("\n"): + if not line.strip(): + break + indent_len = len(indent_re.match(line).group()) + if block and indent_len == leading_indent_len: + yield block.strip() + block = "" + block += line + "\n" + if indent_len < leading_indent_len: + raise ValueError( + f"Indentation block is not consistent in docstring:\n{string}" + ) + if block: + yield block.strip() + + +def check_explicit_fields_are_none(klass, inputs, outputs): + if inputs is not None: + raise ValueError( + f"inputs should not be provided to `python.task` ({inputs}) " + f"explicitly when decorated a class ({klass})" + ) + if outputs is not None: + raise ValueError( + f"outputs should not be provided to `python.task` ({outputs}) " + f"explicitly when decorated a class ({klass})" + ) + + +def _get_attrs_kwargs(field: Field) -> dict[str, ty.Any]: + kwargs = {} + if field.default is not NO_DEFAULT: + kwargs["default"] = field.default + # elif is_optional(field.type): + # kwargs["default"] = None + else: + kwargs["factory"] = nothing_factory + if field.hash_eq: + kwargs["eq"] = hash_function + return kwargs + + +def nothing_factory(): + return attrs.NOTHING + + +# def set_none_default_if_optional(field: Field) -> None: +# if is_optional(field.type) and field.default is NO_DEFAULT: +# field.default = None + + +white_space_re = re.compile(r"\s+") diff --git a/pydra/design/boutiques.py b/pydra/design/boutiques.py new file mode 100644 index 0000000000..4fd8d43760 --- /dev/null +++ b/pydra/design/boutiques.py @@ -0,0 +1,219 @@ +import typing as ty +import json +import tempfile +from urllib.request import urlretrieve +from pathlib import Path +from functools import reduce +from fileformats.generic import File +from pydra.engine.specs import ShellDef +from pydra.design.base import make_task_def +from pydra.design import shell + + +class arg(shell.arg): + """Class for input fields of Boutiques task definitions + + Parameters + ---------- + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + type: type, optional + The type of the field, by default it is Any + default : Any, optional + the default value for the field, by default it is NO_DEFAULT + help: str + A short description of the input field. + allowed_values: list, optional + List of allowed values for the field. + requires: list, optional + Names of the inputs that are required together with the field. + copy_mode: File.CopyMode, optional + The mode of copying the file, by default it is File.CopyMode.any + copy_collation: File.CopyCollation, optional + The collation of the file, by default it is File.CopyCollation.any + copy_ext_decomp: File.ExtensionDecomposition, optional + The extension decomposition of the file, by default it is + File.ExtensionDecomposition.single + readonly: bool, optional + If True the input field can’t be provided by the user but it aggregates other + input fields (for example the fields with argstr: -o {fldA} {fldB}), by default + it is False + """ + + +class out(shell.out): + """Class for output fields of Boutiques task definitions + + Parameters + ---------- + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + type: type, optional + The type of the field, by default it is Any + default : Any, optional + the default value for the field, by default it is NO_DEFAULT + help: str, optional + A short description of the input field. + requires: list, optional + Names of the inputs that are required together with the field. + converter: callable, optional + The converter for the field passed through to the attrs.field, by default it is None + validator: callable | iterable[callable], optional + The validator(s) for the field passed through to the attrs.field, by default it is None + """ + + +def define( + zenodo_id=None, + bosh_file=None, + input_spec_names: list[str] | None = None, + output_spec_names: list[str] | None = None, +): + """ + Initialize this task. + + Parameters + ---------- + zenodo_id: :obj: str + Zenodo ID + bosh_file : : str + json file with the boutiques descriptors + audit_flags : :obj:`pydra.utils.messenger.AuditFlag` + Auditing configuration + cache_dir : :obj:`os.pathlike` + Cache directory + input_spec_names : :obj: list + Input names for input_spec. + messenger_args : + TODO + messengers : + TODO + name : :obj:`str` + Name of this task. + output_spec_names : :obj: list + Output names for output_spec. + strip : :obj:`bool` + TODO + + """ + if (bosh_file and zenodo_id) or not (bosh_file or zenodo_id): + raise Exception("either bosh or zenodo_id has to be specified") + elif zenodo_id: + bosh_file = _download_spec(zenodo_id) + + with bosh_file.open() as f: + bosh_spec = json.load(f) + + inputs, input_keys = _prepare_input_spec(bosh_spec, names_subset=input_spec_names) + outputs = _prepare_output_spec( + bosh_spec, input_keys, names_subset=output_spec_names + ) + return make_task_def( + spec_type=ShellDef, + out_type=out, + arg_type=arg, + inputs=inputs, + outputs=outputs, + ) + + +def _download_spec(zenodo_id): + """ + using boutiques Searcher to find url of zenodo file for a specific id, + and download the file to self.cache_dir + """ + from boutiques.searcher import Searcher + + tmp_dir = Path(tempfile.mkdtemp()) + + searcher = Searcher(zenodo_id, exact_match=True) + hits = searcher.zenodo_search().json()["hits"]["hits"] + if len(hits) == 0: + raise Exception(f"can't find zenodo definition for {zenodo_id}") + elif len(hits) > 1: + raise Exception(f"too many hits for {zenodo_id}") + else: + zenodo_url = hits[0]["files"][0]["links"]["self"] + zenodo_file = tmp_dir / f"zenodo.{zenodo_id}.json" + urlretrieve(zenodo_url, zenodo_file) + return zenodo_file + + +def _prepare_input_spec(bosh_spec: dict[str, ty.Any], names_subset=None): + """creating input definition from the zenodo file + if name_subset provided, only names from the subset will be used in the definition + """ + binputs = bosh_spec["inputs"] + input_keys = {} + fields = [] + for input in binputs: + name = input["id"] + if names_subset is None: + pass + elif name not in names_subset: + continue + else: + names_subset.remove(name) + if input["type"] == "File": + tp = File + elif input["type"] == "String": + tp = str + elif input["type"] == "Number": + tp = float + elif input["type"] == "Flag": + tp = bool + else: + tp = None + # adding list + if tp and "list" in input and input["list"]: + tp = ty.List[tp] + + fields.append( + arg( + name=name, + type=tp, + help=input.get("description", None) or input["name"], + mandatory=not input["optional"], + argstr=input.get("command-line-flag", None), + ) + ) + input_keys[input["value-key"]] = "{" + f"{name}" + "}" + if names_subset: + raise RuntimeError(f"{names_subset} are not in the zenodo input definition") + return fields, input_keys + + +def _prepare_output_spec(bosh_spec: dict[str, ty.Any], input_keys, names_subset=None): + """creating output definition from the zenodo file + if name_subset provided, only names from the subset will be used in the definition + """ + boutputs = bosh_spec["output-files"] + fields = [] + for output in boutputs: + name = output["id"] + if names_subset is None: + pass + elif name not in names_subset: + continue + else: + names_subset.remove(name) + path_template = reduce( + lambda s, r: s.replace(*r), + input_keys.items(), + output["path-template"], + ) + fields.append( + out( + name=name, + type=File, + help=output.get("description", None) or output["name"], + mandatory=not output["optional"], + output_file_template=path_template, + ) + ) + + if names_subset: + raise RuntimeError(f"{names_subset} are not in the zenodo output definition") + return fields diff --git a/pydra/design/python.py b/pydra/design/python.py new file mode 100644 index 0000000000..322a55a923 --- /dev/null +++ b/pydra/design/python.py @@ -0,0 +1,185 @@ +import typing as ty +import inspect +from typing import dataclass_transform +import attrs +from pydra.design.base import ( + Arg, + Out, + ensure_field_objects, + make_task_def, + parse_doc_string, + extract_function_inputs_and_outputs, + check_explicit_fields_are_none, + extract_fields_from_class, +) + +if ty.TYPE_CHECKING: + from pydra.engine.specs import PythonDef + +__all__ = ["arg", "out", "define"] + + +@attrs.define +class arg(Arg): + """Argument of a Python task definition + + Parameters + ---------- + help: str + A short description of the input field. + default : Any, optional + the default value for the argument + allowed_values: list, optional + List of allowed values for the field. + requires: list, optional + Names of the inputs that are required together with the field. + copy_mode: File.CopyMode, optional + The mode of copying the file, by default it is File.CopyMode.any + copy_collation: File.CopyCollation, optional + The collation of the file, by default it is File.CopyCollation.any + copy_ext_decomp: File.ExtensionDecomposition, optional + The extension decomposition of the file, by default it is + File.ExtensionDecomposition.single + readonly: bool, optional + If True the input field can’t be provided by the user but it aggregates other + input fields (for example the fields with argstr: -o {fldA} {fldB}), by default + it is False + type: type, optional + The type of the field, by default it is Any + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + """ + + pass + + +@attrs.define +class out(Out): + """Output of a Python task definition + + Parameters + ---------- + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + type: type, optional + The type of the field, by default it is Any + help: str, optional + A short description of the input field. + requires: list, optional + Names of the inputs that are required together with the field. + converter: callable, optional + The converter for the field passed through to the attrs.field, by default it is None + validator: callable | iterable[callable], optional + The validator(s) for the field passed through to the attrs.field, by default it is None + position : int + The position of the output in the output list, allows for tuple unpacking of + outputs + """ + + pass + + +@dataclass_transform( + kw_only_default=True, + field_specifiers=(out,), +) +def outputs(wrapped): + """Decorator to specify the output fields of a shell command is a dataclass-style type""" + return wrapped + + +@dataclass_transform( + kw_only_default=True, + field_specifiers=(arg,), +) +def define( + wrapped: type | ty.Callable | None = None, + /, + inputs: list[str | Arg] | dict[str, Arg | type] | None = None, + outputs: list[str | Out] | dict[str, Out | type] | type | None = None, + bases: ty.Sequence[type] = (), + outputs_bases: ty.Sequence[type] = (), + auto_attribs: bool = True, + xor: ty.Sequence[str | None] | ty.Sequence[ty.Sequence[str | None]] = (), +) -> "PythonDef": + """ + Create an interface for a function or a class. + + Parameters + ---------- + wrapped : type | callable | None + The function or class to create an interface for. + inputs : list[str | Arg] | dict[str, Arg | type] | None + The inputs to the function or class. + outputs : list[str | Out] | dict[str, Out | type] | type | None + The outputs of the function or class. + auto_attribs : bool + Whether to use auto_attribs mode when creating the class. + xor: Sequence[str | None] | Sequence[Sequence[str | None]], optional + Names of args that are exclusive mutually exclusive, which must include + the name of the current field. If this list includes None, then none of the + fields need to be set. + + Returns + ------- + PythonDef + The task definition class for the Python function + """ + from pydra.engine.specs import PythonDef, PythonOutputs + + def make(wrapped: ty.Callable | type) -> PythonDef: + if inspect.isclass(wrapped): + klass = wrapped + function = klass.function + name = klass.__name__ + check_explicit_fields_are_none(klass, inputs, outputs) + parsed_inputs, parsed_outputs = extract_fields_from_class( + PythonDef, PythonOutputs, klass, arg, out, auto_attribs + ) + else: + if not isinstance(wrapped, ty.Callable): + raise ValueError( + f"wrapped must be a class or a function, not {wrapped!r}" + ) + klass = None + function = wrapped + input_helps, output_helps = parse_doc_string(function.__doc__) + inferred_inputs, inferred_outputs = extract_function_inputs_and_outputs( + function, arg, inputs, outputs + ) + name = function.__name__ + + parsed_inputs, parsed_outputs = ensure_field_objects( + arg_type=arg, + out_type=out, + inputs=inferred_inputs, + outputs=inferred_outputs, + input_helps=input_helps, + output_helps=output_helps, + ) + + parsed_inputs["function"] = arg( + name="function", type=ty.Callable, default=function, hash_eq=True + ) + + defn = make_task_def( + PythonDef, + PythonOutputs, + parsed_inputs, + parsed_outputs, + name=name, + klass=klass, + bases=bases, + outputs_bases=outputs_bases, + xor=xor, + ) + + return defn + + if wrapped is not None: + if not isinstance(wrapped, (ty.Callable, type)): + raise ValueError(f"wrapped must be a class or a callable, not {wrapped!r}") + return make(wrapped) + return make diff --git a/pydra/design/shell.py b/pydra/design/shell.py new file mode 100644 index 0000000000..098a87778d --- /dev/null +++ b/pydra/design/shell.py @@ -0,0 +1,774 @@ +"""Decorators and helper functions to create ShellTasks used in Pydra workflows""" + +from __future__ import annotations +import typing as ty +import re +import glob +from collections import defaultdict +import inspect +from copy import copy +import attrs +import builtins +from typing import dataclass_transform +from fileformats.core import from_mime +from fileformats import generic +from fileformats.core.exceptions import FormatRecognitionError +from pydra.engine.helpers import attrs_values +from pydra.design.base import ( + Arg, + Out, + check_explicit_fields_are_none, + extract_fields_from_class, + ensure_field_objects, + make_task_def, + NO_DEFAULT, +) +from pydra.utils.typing import ( + is_fileset_or_union, + MultiInputObj, + TypeParser, + is_optional, +) + +if ty.TYPE_CHECKING: + from pydra.engine.specs import ShellDef + +__all__ = ["arg", "out", "outarg", "define"] + +EXECUTABLE_HELP_STRING = ( + "the first part of the command, can be a string, " + "e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']" +) + + +@attrs.define(kw_only=True) +class arg(Arg): + """An input field that specifies a command line argument + + Parameters + ---------- + help: str + A short description of the input field. + default : Any, optional + the default value for the argument + mandatory: bool, optional + If True user has to provide a value for the field, by default it is False + allowed_values: list, optional + List of allowed values for the field. + requires: list, optional + List of field names that are required together with the field. + copy_mode: File.CopyMode, optional + The mode of copying the file, by default it is File.CopyMode.any + copy_collation: File.CopyCollation, optional + The collation of the file, by default it is File.CopyCollation.any + copy_ext_decomp: File.ExtensionDecomposition, optional + The extension decomposition of the file, by default it is + File.ExtensionDecomposition.single + readonly: bool, optional + If True the input field can’t be provided by the user but it aggregates other + input fields (for example the fields with argstr: -o {fldA} {fldB}), by default + it is False + type: type, optional + The type of the field, by default it is Any + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + argstr: str, optional + A flag or string that is used in the command before the value, e.g. -v or + -v {inp_field}, but it could be and empty string, “”, in which case the value is + just printed to the command line. If … are used, e.g. -v…, + the flag is used before every element if a list is provided as a value. If the + argstr is None, the field is not part of the command. + position: int, optional + Position of the field in the command, could be nonnegative or negative integer. + If nothing is provided the field will be inserted between all fields with + nonnegative positions and fields with negative positions. + sep: str, optional + A separator if a sequence type is provided as a value, by default " ". + container_path: bool, optional + If True a path will be consider as a path inside the container (and not as a + local path, by default it is False + formatter: function, optional + If provided the argstr of the field is created using the function. This function + can for example be used to combine several inputs into one command argument. The + function can take field (this input field will be passed to the function), + inputs (entire inputs will be passed) or any input field name (a specific input + field will be sent). + """ + + argstr: str | None = "" + position: int | None = None + sep: str = " " + allowed_values: list | None = None + container_path: bool = False # IS THIS STILL USED?? + formatter: ty.Callable | None = None + + +@attrs.define(kw_only=True) +class out(Out): + """An output field that specifies a command line argument + + Parameters + ---------- + callable : Callable, optional + If provided the output file name (or list of file names) is created using the + function. The function can take field (the specific output field will be passed + to the function), output_dir (task output_dir will be used), stdout, stderr + (stdout and stderr of the task will be sent) inputs (entire inputs will be + passed) or any input field name (a specific input field will be sent). + """ + + callable: ty.Callable | None = attrs.field(default=None) + + def __attrs_post_init__(self): + # Set type from return annotation of callable if not set + if self.type is ty.Any and self.callable: + self.type = ty.get_type_hints(self.callable).get("return", ty.Any) + + @callable.validator + def _callable_validator(self, _, value): + + if value: + if not callable(value): + raise ValueError(f"callable must be a function, not {value!r}") + elif ( + self.default is NO_DEFAULT + and not getattr(self, "path_template", None) + and self.name + not in [ + "return_code", + "stdout", + "stderr", + ] + ): # ShellOutputs.BASE_NAMES + raise ValueError( + "A shell output field must have either a callable or a path_template" + ) + + +@attrs.define(kw_only=True) +class outarg(arg, Out): + """An input field that specifies where to save the output file + + Parameters + ---------- + help: str + A short description of the input field. + default : Any, optional + the default value for the argument + mandatory: bool, optional + If True user has to provide a value for the field, by default it is False + allowed_values: list, optional + List of allowed values for the field. + requires: list, optional + List of field names that are required together with the field. + copy_mode: File.CopyMode, optional + The mode of copying the file, by default it is File.CopyMode.any + copy_collation: File.CopyCollation, optional + The collation of the file, by default it is File.CopyCollation.any + copy_ext_decomp: File.ExtensionDecomposition, optional + The extension decomposition of the file, by default it is + File.ExtensionDecomposition.single + readonly: bool, optional + If True the input field can’t be provided by the user but it aggregates other + input fields (for example the fields with argstr: -o {fldA} {fldB}), by default + it is False + type: type, optional + The type of the field, by default it is Any + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + argstr: str, optional + A flag or string that is used in the command before the value, e.g. -v or + -v {inp_field}, but it could be and empty string, “”. If … are used, e.g. -v…, + the flag is used before every element if a list is provided as a value. If no + argstr is used the field is not part of the command. + position: int, optional + Position of the field in the command line, could be nonnegative or negative integer. + If nothing is provided the field will be inserted between all fields with + nonnegative positions and fields with negative positions. + sep: str, optional + A separator if a list is provided as a value. + container_path: bool, optional + If True a path will be consider as a path inside the container (and not as a + local path, by default it is False + formatter: function, optional + If provided the argstr of the field is created using the function. This function + can for example be used to combine several inputs into one command argument. The + function can take field (this input field will be passed to the function), + inputs (entire inputs will be passed) or any input field name (a specific input + field will be sent). + path_template: str, optional + The template used to specify where the output file will be written to can use + other fields, e.g. {file1}. Used in order to create an output definition. + """ + + path_template: str | None = attrs.field(default=None) + keep_extension: bool = attrs.field(default=True) + + @path_template.validator + def _validate_path_template(self, attribute, value): + if value: + if self.default not in (NO_DEFAULT, True, None): + raise ValueError( + f"path_template ({value!r}) can only be provided when there is no " + f"default value provided ({self.default!r})" + ) + if not (is_fileset_or_union(self.type) or self.type is ty.Any): + raise ValueError( + f"path_template ({value!r}) can only be provided when type is a FileSet, " + f"or union thereof, not {self.type!r}" + ) + # if self.argstr is None: + # raise ValueError( + # f"path_template ({value!r}) can only be provided when argstr is not None" + # ) + + # @keep_extension.validator + # def _validate_keep_extension(self, attribute, value): + # if value and self.path_template is None: + # raise ValueError( + # f"keep_extension ({value!r}) can only be provided when path_template " + # f"is provided" + # ) + + +@dataclass_transform( + kw_only_default=True, + field_specifiers=(out, outarg), +) +def outputs(wrapped): + """Decorator to specify the output fields of a shell command is a dataclass-style type""" + return wrapped + + +@dataclass_transform( + kw_only_default=True, + field_specifiers=(arg,), +) +def define( + wrapped: type | str | None = None, + /, + inputs: list[str | Arg] | dict[str, Arg | type] | None = None, + outputs: list[str | Out] | dict[str, Out | type] | type | None = None, + bases: ty.Sequence[type] = (), + outputs_bases: ty.Sequence[type] = (), + auto_attribs: bool = True, + name: str | None = None, + xor: ty.Sequence[str | None] | ty.Sequence[ty.Sequence[str | None]] = (), +) -> "ShellDef": + """Create a task definition for a shell command. Can be used either as a decorator on + the "canonical" dataclass-form of a task definition or as a function that takes a + "shell-command template string" of the form + + ``` + shell.define("command --output ") + ``` + + Fields are inferred from the template if not provided. In the template, inputs are + specified with `` and outputs with ``. + + ``` + my_command + ``` + + The types of the fields can be specified using their MIME like (see fileformats.core.from_mime), e.g. + + ``` + my_command + ``` + + The template can also specify options with `-` or `--` followed by the option name + and arguments with ``. The type is optional and will default to + `generic/fs-object` if not provided for arguments and `field/text` for + options. The file-formats namespace can be dropped for generic and field formats, e.g. + + ``` + another-command --output + ``` + + Parameters + ---------- + wrapped : type | str | None + The class or command line template to create an interface for + inputs : list[str | Arg] | dict[str, Arg | type] | None + The input fields of the shell command + outputs : list[str | Out] | dict[str, Out | type] | type | None + The output fields of the shell command + auto_attribs : bool + Whether to use auto_attribs mode when creating the class + args_last : bool + Whether to put the executable argument last in the command line instead of first + as they appear in the template + name: str | None + The name of the returned class + xor: Sequence[str | None] | Sequence[Sequence[str | None]], optional + Names of args that are exclusive mutually exclusive, which must include + the name of the current field. If this list includes None, then none of the + fields need to be set. + + Returns + ------- + ShellDef + The interface for the shell command + """ + from pydra.engine.specs import ShellDef, ShellOutputs + + def make( + wrapped: ty.Callable | type | None = None, + ) -> ShellDef: + + if inspect.isclass(wrapped): + klass = wrapped + executable: str + try: + executable = attrs.fields(klass).executable.default + except (AttributeError, attrs.exceptions.NotAnAttrsClassError): + try: + executable = klass.executable + except AttributeError: + raise AttributeError( + f"Shell task class {wrapped} must have an `executable` " + "attribute that specifies the command to run" + ) from None + if not isinstance(executable, str) and not ( + isinstance(executable, ty.Sequence) + and all(isinstance(e, str) for e in executable) + ): + raise ValueError( + "executable must be a string or a sequence of strings" + f", not {executable!r}" + ) + class_name = klass.__name__ + check_explicit_fields_are_none(klass, inputs, outputs) + parsed_inputs, parsed_outputs = extract_fields_from_class( + ShellDef, ShellOutputs, klass, arg, out, auto_attribs + ) + else: + if not isinstance(wrapped, (str, list)): + raise ValueError( + f"wrapped must be a class or a string, not {wrapped!r}" + ) + klass = None + input_helps, output_helps = {}, {} + + executable, inferred_inputs, inferred_outputs = parse_command_line_template( + wrapped, + inputs=inputs, + outputs=outputs, + ) + + parsed_inputs, parsed_outputs = ensure_field_objects( + arg_type=arg, + out_type=out, + inputs=inferred_inputs, + outputs=inferred_outputs, + input_helps=input_helps, + output_helps=output_helps, + ) + + if name: + class_name = name + else: + class_name = ( + "_".join(executable) if isinstance(executable, list) else executable + ) + class_name = re.sub(r"[^\w]", "_", class_name) + if class_name[0].isdigit(): + class_name = f"_{class_name}" + + # Add in fields from base classes + parsed_inputs.update({n: getattr(ShellDef, n) for n in ShellDef.BASE_NAMES}) + parsed_outputs.update( + {n: getattr(ShellOutputs, n) for n in ShellOutputs.BASE_NAMES} + ) + + # Update the inputs (overriding inputs from base classes) with the executable + # and the output argument fields + parsed_inputs.update( + {o.name: o for o in parsed_outputs.values() if isinstance(o, arg)} + ) + parsed_inputs["executable"] = arg( + name="executable", + type=str | ty.Sequence[str], + argstr="", + position=0, + default=executable, + validator=attrs.validators.min_len(1), + help=EXECUTABLE_HELP_STRING, + ) + + # Set positions for the remaining inputs that don't have an explicit position + position_stack = remaining_positions(list(parsed_inputs.values())) + for inpt in parsed_inputs.values(): + if inpt.name == "additional_args": + continue + if inpt.position is None: + inpt.position = position_stack.pop(0) + + # Convert string default values to callables that glob the files in the cwd + for outpt in parsed_outputs.values(): + if ( + isinstance(outpt, out) + and isinstance(outpt.default, str) + and TypeParser.contains_type(generic.FileSet, outpt.type) + ): + outpt.callable = GlobCallable(outpt.default) + outpt.default = NO_DEFAULT + + defn = make_task_def( + ShellDef, + ShellOutputs, + parsed_inputs, + parsed_outputs, + name=class_name, + klass=klass, + bases=bases, + outputs_bases=outputs_bases, + xor=xor, + ) + return defn + + # If a name is provided (and hence not being used as a decorator), check to see if + # we are extending from a class that already defines an executable + if wrapped is None and name is not None: + for base in bases: + try: + wrapped = attrs.fields(base).executable.default + except (AttributeError, attrs.exceptions.NotAnAttrsClassError): + try: + wrapped = base.executable + except AttributeError: + pass + if wrapped: + break + if wrapped is None: + raise ValueError( + f"name ({name!r}) can only be provided when creating a class " + "dynamically, i.e. not using it as a decorator. Check to see " + "whether you have forgotten to provide the command line template" + ) + # If wrapped is provided (i.e. this is not being used as a decorator), return the + # interface class + if wrapped is not None: + if not isinstance(wrapped, (type, str, list)): + raise ValueError( + f"wrapped must be a class, a string or a list, not {wrapped!r}" + ) + return make(wrapped) + return make + + +def parse_command_line_template( + template: str, + inputs: list[str | Arg] | dict[str, Arg | type] | None = None, + outputs: list[str | Out] | dict[str, Out | type] | None = None, +) -> ty.Tuple[str, dict[str, Arg | type], dict[str, Out | type]]: + """Parses a command line template into a name and input and output fields. Fields + are inferred from the template if not explicitly provided. + + In the template, inputs are specified with `` and outputs with ``. + The types of the fields can be specified using their MIME like (see fileformats.core.from_mime), e.g. + + ``` + my_command + ``` + + The template can also specify options with `-` or `--` + followed by the option name and arguments with ``. The type is optional and + will default to `generic/fs-object` if not provided for arguments and `field/text` for + options. The file-formats namespace can be dropped for generic and field formats, e.g. + + ``` + another-command --output + ``` + + Parameters + ---------- + template : str + The command line template + inputs : list[str | Arg] | dict[str, Arg | type] | None + The input fields of the shell command + outputs : list[str | Out] | dict[str, Out | type] | type | None + The output fields of the shell command + + Returns + ------- + executable : str + The name of the command line template + inputs : dict[str, Arg | type] + The input fields of the command line template + outputs : dict[str, Out | type] + The output fields of the command line template + + Raises + ------ + ValueError + If an unknown token is found in the command line template + TypeError + If an unknown type is found in the command line template + """ + if isinstance(inputs, list): + inputs = {arg.name: arg for arg in inputs} + elif isinstance(inputs, dict): + inputs = copy(inputs) # We don't want to modify the original + else: + assert inputs is None + inputs = {} + if isinstance(outputs, list): + outputs = {out.name: out for out in outputs} + elif isinstance(outputs, dict): + outputs = copy(outputs) # We don't want to modify the original + else: + assert outputs is None + outputs = {} + if isinstance(template, list): + tokens = template + else: + tokens = template.split() + executable = [] + start_args_index = 0 + for part in tokens: + if part.startswith("<") or part.startswith("-"): + break + executable.append(part) + start_args_index += 1 + if not executable: + raise ValueError(f"Found no executable in command line template: {template}") + if len(executable) == 1: + executable = executable[0] + tokens = tokens[start_args_index:] + if not tokens: + return executable, inputs, outputs + arg_pattern = r"<([:a-zA-Z0-9_,\|\-\.\/\+\*]+(?:\?|(?:=|\$)[^>]+)?)>" + opt_pattern = r"--?[a-zA-Z0-9_]+" + arg_re = re.compile(arg_pattern) + opt_re = re.compile(opt_pattern) + bool_arg_re = re.compile(f"({opt_pattern}){arg_pattern}") + + arguments = [] + option = None + + def add_arg(name, field_type, kwds): + """Merge the typing information with an existing field if it exists""" + if issubclass(field_type, Out): + dct = outputs + else: + dct = inputs + try: + field = dct.pop(name) + except KeyError: + field = field_type(name=name, **kwds) + else: + if isinstance(field, dict): + field = field_type(**field) + elif isinstance(field, type) or ty.get_origin(field): + kwds["type"] = field + field = field_type(name=name, **kwds) + elif not isinstance(field, field_type): # If field type is outarg not out + field = field_type(**attrs_values(field)) + field.name = name + type_ = kwds.pop("type", field.type) + if field.type is ty.Any: + field.type = type_ + for k, v in kwds.items(): + setattr(field, k, v) + dct[name] = field + if issubclass(field_type, Arg): + arguments.append(field) + + def from_type_str(type_str) -> type: + types = [] + for tp in type_str.split(","): + if "/" in tp: + type_ = from_mime(tp) + elif tp == "...": + type_ = "..." + else: + if tp in ("int", "float", "str", "bool"): + type_ = getattr(builtins, tp) + else: + try: + type_ = from_mime(f"generic/{tp}") + except FormatRecognitionError: + raise TypeError( + f"Found unknown type, {tp!r}, in command template: {template!r}" + ) from None + types.append(type_) + if len(types) == 2 and types[1] == "...": + type_ = tuple[types[0], ...] + elif len(types) > 1: + type_ = tuple[*types] + else: + type_ = types[0] + return type_ + + for token in tokens: + if match := arg_re.match(token): + name = match.group(1) + modify = False + if name.startswith("out|"): + name = name[4:] + field_type = outarg + elif name.startswith("modify|"): + name = name[7:] + field_type = arg + modify = True + else: + field_type = arg + # Identify type after ':' symbols + kwds = {} + is_multi = False + optional = False + if name.endswith("?"): + assert "=" not in name + name = name[:-1] + optional = True + kwds["default"] = None + elif name.endswith("+"): + is_multi = True + name = name[:-1] + elif name.endswith("*"): + is_multi = True + name = name[:-1] + kwds["default"] = attrs.Factory(list) + elif "=" in name: + name, default = name.split("=") + kwds["default"] = ( + default[1:-1] if re.match(r"('|\").*\1", default) else eval(default) + ) + elif "$" in name: + name, path_template = name.split("$") + kwds["path_template"] = path_template + if field_type is not outarg: + raise ValueError( + f"Path templates can only be used with output fields, not {token}" + ) + if ":" in name: + name, type_str = name.split(":") + type_ = from_type_str(type_str) + if ty.get_origin(type_) is tuple: + kwds["sep"] = " " + else: + type_ = generic.FsObject if option is None else str + if is_multi: + type_ = MultiInputObj[type_] + if optional: + type_ |= None # Make the arguments optional + kwds["type"] = type_ + if modify: + kwds["copy_mode"] = generic.File.CopyMode.copy + # Add field to outputs with the same name as the input + add_arg(name, out, {"type": type_, "callable": _InputPassThrough(name)}) + # If name contains a '.', treat it as a file template and strip it from the name + if field_type is outarg and "path_template" not in kwds: + path_template = name + if is_fileset_or_union(type_): + if ty.get_origin(type_): + ext_type = next(a for a in ty.get_args(type_) if a is not None) + else: + ext_type = type_ + if ext_type.ext is not None: + path_template = name + ext_type.ext + kwds["path_template"] = path_template + # Set the default value to None if the field is optional and no default is + # provided + if is_optional(type_) and "default" not in kwds: + kwds["default"] = None + if option is None: + add_arg(name, field_type, kwds) + else: + kwds["argstr"] = option + add_arg(name, field_type, kwds) + option = None + + elif match := bool_arg_re.match(token): + argstr, var = match.groups() + if "=" in var: + var, default = var.split("=") + default = eval(default) + else: + default = False + add_arg(var, arg, {"type": bool, "argstr": argstr, "default": default}) + elif match := opt_re.match(token): + option = token + else: + raise ValueError( + f"Found unknown token {token!r} in command line template: {template}" + ) + + remaining_pos = remaining_positions(arguments, len(arguments) + 1, 1) + + for argument in arguments: + if argument.position is None: + argument.position = remaining_pos.pop(0) + + return executable, inputs, outputs + + +def remaining_positions( + args: list[Arg], num_args: int | None = None, start: int = 0 +) -> ty.List[int]: + """Get the remaining positions for input fields + + Parameters + ---------- + args : list[Arg] + The list of input fields + num_args : int, optional + The number of arguments, by default it is the length of the args + + Returns + ------- + list[int] + The list of remaining positions + + Raises + ------ + ValueError + If multiple fields have the same position + """ + if num_args is None: + num_args = len(args) - 1 # Subtract 1 for the 'additional_args' field + # Check for multiple positions + positions = defaultdict(list) + for arg in args: + if arg.name == "additional_args": + continue + if arg.position is not None: + if arg.position >= 0: + positions[arg.position].append(arg) + else: + positions[num_args + arg.position].append(arg) + if multiple_positions := { + k: [f"{a.name}({a.position})" for a in v] + for k, v in positions.items() + if len(v) > 1 + }: + raise ValueError( + f"Multiple fields have the overlapping positions: {multiple_positions}" + ) + return [i for i in range(start, num_args) if i not in positions] + + +@attrs.define +class _InputPassThrough: + """A class that can be used to pass through an input to the output""" + + name: str + + def __call__(self, inputs: ShellDef) -> ty.Any: + return getattr(inputs, self.name) + + +class GlobCallable: + """Callable that can be used to glob files""" + + def __init__(self, pattern: str): + self.pattern = pattern + + def __call__(self) -> generic.FileSet: + matches = glob.glob(self.pattern) + if not matches: + raise FileNotFoundError(f"No files found matching pattern: {self.pattern}") + return matches diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py new file mode 100644 index 0000000000..341183c308 --- /dev/null +++ b/pydra/design/tests/test_python.py @@ -0,0 +1,394 @@ +from operator import attrgetter +import typing as ty +from decimal import Decimal +import attrs +import pytest +from pydra.engine.helpers import list_fields +from pydra.engine.specs import PythonDef, PythonOutputs +from pydra.design import python + + +sort_key = attrgetter("name") + + +def test_interface_wrap_function(tmp_path): + def func(a: int) -> float: + """Sample function with inputs and outputs""" + return a * 2 + + SampleDef = python.define(func) + + assert issubclass(SampleDef, PythonDef) + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) + assert inputs == [ + python.arg(name="a", type=int), + python.arg(name="function", type=ty.Callable, hash_eq=True, default=func), + ] + assert outputs == [python.out(name="out", type=float)] + definition = SampleDef(a=1) + outputs = definition(cache_dir=tmp_path) + assert outputs.out == 2.0 + with pytest.raises(TypeError): + SampleDef(a=1.5) + + +def test_interface_wrap_function_with_default(): + def func(a: int, k: float = 2.0) -> float: + """Sample function with inputs and outputs""" + return a * k + + SampleDef = python.define(func) + + assert issubclass(SampleDef, PythonDef) + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) + assert inputs == [ + python.arg(name="a", type=int), + python.arg(name="function", type=ty.Callable, hash_eq=True, default=func), + python.arg(name="k", type=float, default=2.0), + ] + assert outputs == [python.out(name="out", type=float)] + assert SampleDef(a=1)().out == 2.0 + assert SampleDef(a=10, k=3.0)().out == 30.0 + + +def test_interface_wrap_function_overrides(): + def func(a: int) -> float: + """Sample function with inputs and outputs""" + return a * 2 + + SampleDef = python.define( + func, + inputs={"a": python.arg(help="The argument to be doubled")}, + outputs={"b": python.out(help="the doubled output", type=Decimal)}, + ) + + assert issubclass(SampleDef, PythonDef) + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) + assert inputs == [ + python.arg(name="a", type=int, help="The argument to be doubled"), + python.arg(name="function", type=ty.Callable, hash_eq=True, default=func), + ] + assert outputs == [ + python.out(name="b", type=Decimal, help="the doubled output"), + ] + outputs = SampleDef.Outputs(b=Decimal(2.0)) + assert isinstance(outputs.b, Decimal) + + +def test_interface_wrap_function_types(): + def func(a: int) -> int: + """Sample function with inputs and outputs""" + return a * 2 + + SampleDef = python.define( + func, + inputs={"a": float}, + outputs={"b": float}, + ) + + assert issubclass(SampleDef, PythonDef) + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) + assert inputs == [ + python.arg(name="a", type=float), + python.arg(name="function", type=ty.Callable, hash_eq=True, default=func), + ] + assert outputs == [python.out(name="b", type=float)] + intf = SampleDef(a=1) + assert isinstance(intf.a, float) + outputs = SampleDef.Outputs(b=2.0) + assert isinstance(outputs.b, float) + + +def test_decorated_function_interface(): + @python.define(outputs=["c", "d"]) + def SampleDef(a: int, b: float) -> tuple[float, float]: + """Sample function for testing""" + return a + b, a * b + + assert issubclass(SampleDef, PythonDef) + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) + assert inputs == [ + python.arg(name="a", type=int), + python.arg(name="b", type=float), + python.arg( + name="function", + type=ty.Callable, + hash_eq=True, + default=attrs.fields(SampleDef).function.default, + ), + ] + assert outputs == [ + python.out(name="c", type=float), + python.out(name="d", type=float), + ] + assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" + SampleDef.Outputs(c=1.0, d=2.0) + + +def test_interface_with_function_docstr(): + @python.define(outputs=["c", "d"]) + def SampleDef(a: int, b: float) -> tuple[float, float]: + """Sample function for testing + + :param a: First input to be inputted + :param b: Second input + :return c: Sum of a and b + :return d: product of a and b + """ + return a + b, a * b + + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) + assert inputs == [ + python.arg(name="a", type=int, help="First input to be inputted"), + python.arg(name="b", type=float, help="Second input"), + python.arg( + name="function", + type=ty.Callable, + hash_eq=True, + default=attrs.fields(SampleDef).function.default, + ), + ] + assert outputs == [ + python.out(name="c", type=float, help="Sum of a and b"), + python.out(name="d", type=float, help="product of a and b"), + ] + assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" + + +def test_interface_with_function_google_docstr(): + @python.define(outputs=["c", "d"]) + def SampleDef(a: int, b: float) -> tuple[float, float]: + """Sample function for testing + + Args: + a: First input + to be inputted + b: Second input + + Returns: + c: Sum of a and b + d: Product of a and b + """ + return a + b, a * b + + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) + assert inputs == [ + python.arg(name="a", type=int, help="First input to be inputted"), + python.arg(name="b", type=float, help="Second input"), + python.arg( + name="function", + type=ty.Callable, + hash_eq=True, + default=attrs.fields(SampleDef).function.default, + ), + ] + assert outputs == [ + python.out(name="c", type=float, help="Sum of a and b"), + python.out(name="d", type=float, help="Product of a and b"), + ] + assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" + + +def test_interface_with_function_numpy_docstr(): + @python.define( + outputs=["c", "d"] + ) # Could potentiall read output names from doc-string instead + def SampleDef(a: int, b: float) -> tuple[float, float]: + """Sample function for testing + + Parameters + ---------- + a: int + First input + to be inputted + b: float + Second input + + Returns + ------- + c : int + Sum of a and b + d : float + Product of a and b + """ + return a + b, a * b + + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) + assert inputs == [ + python.arg(name="a", type=int, help="First input to be inputted"), + python.arg(name="b", type=float, help="Second input"), + python.arg( + name="function", + type=ty.Callable, + hash_eq=True, + default=attrs.fields(SampleDef).function.default, + ), + ] + assert outputs == [ + python.out(name="c", type=float, help="Sum of a and b"), + python.out(name="d", type=float, help="Product of a and b"), + ] + assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" + + +def test_interface_with_class(): + @python.define + class SampleDef(PythonDef["SampleDef.Outputs"]): + """Sample class for testing + + Args: + a: First input + to be inputted + b: Second input + """ + + a: int + b: float = 2.0 + + class Outputs(PythonOutputs): + """ + Args: + c: Sum of a and b + d: Product of a and b + """ + + c: float + d: float + + @staticmethod + def function(a, b): + return a + b, a * b + + assert issubclass(SampleDef, PythonDef) + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) + assert inputs == [ + python.arg(name="a", type=int, help="First input to be inputted"), + python.arg(name="b", type=float, default=2.0, help="Second input"), + python.arg( + name="function", + type=ty.Callable, + hash_eq=True, + default=attrs.fields(SampleDef).function.default, + ), + ] + assert outputs == [ + python.out(name="c", type=float, help="Sum of a and b"), + python.out(name="d", type=float, help="Product of a and b"), + ] + assert SampleDef.function.__name__ == "function" + SampleDef(a=1) + SampleDef(a=1, b=2.0) + SampleDef.Outputs(c=1.0, d=2.0) + + +def test_interface_with_inheritance(): + @python.define + class SampleDef(PythonDef["SampleDef.Outputs"]): + """Sample class for testing + + Args: + a: First input + to be inputted + b: Second input + """ + + a: int + b: float + + class Outputs(PythonOutputs): + """ + Args: + c: Sum of a and b + d: Product of a and b + """ + + c: float + d: float + + @staticmethod + def function(a, b): + return a + b, a * b + + assert issubclass(SampleDef, PythonDef) + + +def test_interface_with_class_no_auto_attribs(): + @python.define(auto_attribs=False) + class SampleDef(PythonDef["SampleDef.Outputs"]): + a: int = python.arg(help="First input to be inputted") + b: float = python.arg(help="Second input") + + x: int + + class Outputs(PythonOutputs): + c: float = python.out(help="Sum of a and b") + d: float = python.out(help="Product of a and b") + + y: str + + @staticmethod + def function(a, b): + return a + b, a * b + + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) + assert inputs == [ + python.arg(name="a", type=int, help="First input to be inputted"), + python.arg(name="b", type=float, help="Second input"), + python.arg( + name="function", + type=ty.Callable, + hash_eq=True, + default=attrs.fields(SampleDef).function.default, + ), + ] + assert outputs == [ + python.out(name="c", type=float, help="Sum of a and b"), + python.out(name="d", type=float, help="Product of a and b"), + ] + assert SampleDef.function.__name__ == "function" + SampleDef(a=1, b=2.0) + SampleDef.Outputs(c=1.0, d=2.0) + with pytest.raises(TypeError): + SampleDef(a=1, b=2.0, x=3) + with pytest.raises(TypeError): + SampleDef.Outputs(c=1.0, d=2.0, y="hello") + + +def test_interface_invalid_wrapped1(): + with pytest.raises(ValueError): + + @python.define(inputs={"a": python.arg()}) + class SampleDef(PythonDef["SampleDef.Outputs"]): + a: int + + class Outputs: + b: float + + @staticmethod + def function(a): + return a + 1 + + +def test_interface_invalid_wrapped2(): + with pytest.raises(ValueError): + + @python.define(outputs={"b": python.out()}) + class SampleDef(PythonDef["SampleDef.Outputs"]): + a: int + + class Outputs: + b: float + + @staticmethod + def function(a): + return a + 1 diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py new file mode 100644 index 0000000000..7a3a1896fb --- /dev/null +++ b/pydra/design/tests/test_shell.py @@ -0,0 +1,1006 @@ +import os +import typing as ty +from pathlib import Path +import attrs +import pytest +import cloudpickle as cp +from pydra.design import shell +from pydra.engine.helpers import list_fields +from pydra.engine.specs import ( + ShellDef, + ShellOutputs, + RETURN_CODE_HELP, + STDOUT_HELP, + STDERR_HELP, +) +from fileformats.generic import File, Directory, FsObject +from fileformats import text, image +from pydra.utils.typing import MultiInputObj + + +def test_interface_template(): + + Cp = shell.define("cp ") + + assert issubclass(Cp, ShellDef) + output = shell.outarg( + name="out_path", + path_template="out_path", + type=FsObject, + position=2, + ) + assert sorted_fields(Cp) == [ + shell.arg( + name="executable", + validator=attrs.validators.min_len(1), + default="cp", + type=str | ty.Sequence[str], + position=0, + help=shell.EXECUTABLE_HELP_STRING, + ), + shell.arg(name="in_path", type=FsObject, position=1), + output, + ShellDef.additional_args, + ] + assert sorted_fields(Cp.Outputs) == [ + output, + shell.out( + name="return_code", + type=int, + help=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help=STDOUT_HELP, + ), + ] + intf = Cp(in_path=File.mock("in-path.txt")) + assert intf.executable == "cp" + Cp(in_path=File.mock("in-path.txt"), out_path=Path("./out-path.txt")) + Cp.Outputs(out_path=File.mock("in-path.txt")) + + +def test_interface_template_w_types_and_path_template_ext(): + + TrimPng = shell.define("trim-png ") + + assert issubclass(TrimPng, ShellDef) + output = shell.outarg( + name="out_image", + path_template="out_image.png", + type=image.Png, + position=2, + ) + assert sorted_fields(TrimPng) == [ + shell.arg( + name="executable", + validator=attrs.validators.min_len(1), + default="trim-png", + type=str | ty.Sequence[str], + position=0, + help=shell.EXECUTABLE_HELP_STRING, + ), + shell.arg(name="in_image", type=image.Png, position=1), + output, + ShellDef.additional_args, + ] + assert sorted_fields(TrimPng.Outputs) == [ + output, + shell.out( + name="return_code", + type=int, + help=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help=STDOUT_HELP, + ), + ] + TrimPng(in_image=image.Png.mock()) + TrimPng(in_image=image.Png.mock(), out_image=Path("./new_image.png")) + TrimPng.Outputs(out_image=image.Png.mock()) + + +def test_interface_template_w_modify(): + + TrimPng = shell.define("trim-png ") + + assert issubclass(TrimPng, ShellDef) + assert sorted_fields(TrimPng) == [ + shell.arg( + name="executable", + validator=attrs.validators.min_len(1), + default="trim-png", + type=str | ty.Sequence[str], + position=0, + help=shell.EXECUTABLE_HELP_STRING, + ), + shell.arg( + name="image", type=image.Png, position=1, copy_mode=File.CopyMode.copy + ), + ShellDef.additional_args, + ] + assert sorted_fields(TrimPng.Outputs) == [ + shell.out( + name="image", + type=image.Png, + callable=shell._InputPassThrough("image"), + ), + shell.out( + name="return_code", + type=int, + help=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help=STDOUT_HELP, + ), + ] + TrimPng(image=image.Png.mock()) + TrimPng.Outputs(image=image.Png.mock()) + + +def test_interface_template_more_complex(): + + Cp = shell.define( + ( + "cp " + "-R " + "--text-arg " + "--int-arg " + "--tuple-arg " + ), + ) + + assert issubclass(Cp, ShellDef) + output = shell.outarg( + name="out_dir", + type=Directory, + path_template="out_dir", + position=2, + ) + assert sorted_fields(Cp) == [ + shell.arg( + name="executable", + validator=attrs.validators.min_len(1), + default="cp", + type=str | ty.Sequence[str], + position=0, + help=shell.EXECUTABLE_HELP_STRING, + ), + shell.arg( + name="in_fs_objects", + type=MultiInputObj[FsObject], + position=1, + ), + output, + shell.arg(name="recursive", argstr="-R", type=bool, default=False, position=3), + shell.arg( + name="text_arg", + argstr="--text-arg", + type=str | None, + default=None, + position=4, + ), + shell.arg( + name="int_arg", + argstr="--int-arg", + type=int | None, + default=None, + position=5, + ), + shell.arg( + name="tuple_arg", + argstr="--tuple-arg", + type=tuple[int, str] | None, + sep=" ", + default=None, + position=6, + ), + ShellDef.additional_args, + ] + assert sorted_fields(Cp.Outputs) == [ + output, + shell.out( + name="return_code", + type=int, + help=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help=STDOUT_HELP, + ), + ] + Cp(in_fs_objects=[File.sample(), File.sample(seed=1)]) + Cp.Outputs(out_dir=Directory.sample()) + + +def test_interface_template_with_overrides_and_optionals(): + + RECURSIVE_HELP = ( + "If source_file designates a directory, cp copies the directory and the entire " + "subtree connected at that point." + ) + + Cp = shell.define( + ( + "cp " + "-R " + "--text-arg " + "--int-arg " + "--tuple-arg " + ), + inputs={"recursive": shell.arg(help=RECURSIVE_HELP)}, + outputs={ + "out_dir": shell.outarg(position=-2), + "out_file": shell.outarg(position=-1), + }, + ) + + assert issubclass(Cp, ShellDef) + outargs = [ + shell.outarg( + name="out_dir", + type=Directory, + path_template="out_dir", + position=-2, + ), + shell.outarg( + name="out_file", + type=File | None, + default=None, + path_template="out_file", + position=-1, + ), + ] + assert sorted_fields(Cp) == [ + shell.arg( + name="executable", + validator=attrs.validators.min_len(1), + default="cp", + type=str | ty.Sequence[str], + position=0, + help=shell.EXECUTABLE_HELP_STRING, + ), + shell.arg(name="in_fs_objects", type=MultiInputObj[FsObject], position=1), + shell.arg( + name="recursive", + argstr="-R", + type=bool, + default=False, + help=RECURSIVE_HELP, + position=2, + ), + shell.arg(name="text_arg", argstr="--text-arg", type=str, position=3), + shell.arg( + name="int_arg", + argstr="--int-arg", + type=int | None, + default=None, + position=4, + ), + shell.arg( + name="tuple_arg", + argstr="--tuple-arg", + type=tuple[int, str], + sep=" ", + position=5, + ), + ] + outargs + [ShellDef.additional_args] + assert sorted_fields(Cp.Outputs) == outargs + [ + shell.out( + name="return_code", + type=int, + help=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help=STDOUT_HELP, + ), + ] + + +def test_interface_template_with_defaults(): + + Cp = shell.define( + ( + "cp " + "-R " + "--text-arg " + "--int-arg " + "--tuple-arg " + ), + ) + + assert issubclass(Cp, ShellDef) + output = shell.outarg( + name="out_dir", + type=Directory, + path_template="out_dir", + position=2, + ) + assert sorted_fields(Cp) == [ + shell.arg( + name="executable", + validator=attrs.validators.min_len(1), + default="cp", + type=str | ty.Sequence[str], + position=0, + help=shell.EXECUTABLE_HELP_STRING, + ), + shell.arg(name="in_fs_objects", type=MultiInputObj[FsObject], position=1), + output, + shell.arg(name="recursive", argstr="-R", type=bool, default=True, position=3), + shell.arg( + name="text_arg", argstr="--text-arg", type=str, position=4, default="foo" + ), + shell.arg(name="int_arg", argstr="--int-arg", type=int, position=5, default=99), + shell.arg( + name="tuple_arg", + argstr="--tuple-arg", + type=tuple[int, str], + default=(1, "bar"), + position=6, + sep=" ", + ), + ShellDef.additional_args, + ] + assert sorted_fields(Cp.Outputs) == [ + output, + shell.out( + name="return_code", + type=int, + help=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help=STDOUT_HELP, + ), + ] + Cp(in_fs_objects=[File.sample(), File.sample(seed=1)]) + Cp.Outputs(out_dir=Directory.sample()) + + +def test_interface_template_with_type_overrides(): + + Cp = shell.define( + ( + "cp " + "-R " + "--text-arg " + "--int-arg " + "--tuple-arg " + ), + inputs={"text_arg": str, "int_arg": int | None}, + ) + + assert issubclass(Cp, ShellDef) + output = shell.outarg( + name="out_dir", + type=Directory, + path_template="out_dir", + position=2, + ) + assert sorted_fields(Cp) == [ + shell.arg( + name="executable", + validator=attrs.validators.min_len(1), + default="cp", + type=str | ty.Sequence[str], + position=0, + help=shell.EXECUTABLE_HELP_STRING, + ), + shell.arg(name="in_fs_objects", type=MultiInputObj[FsObject], position=1), + output, + shell.arg(name="recursive", argstr="-R", type=bool, default=False, position=3), + shell.arg(name="text_arg", argstr="--text-arg", type=str, position=4), + shell.arg( + name="int_arg", + argstr="--int-arg", + type=int | None, + position=5, + ), + shell.arg( + name="tuple_arg", + argstr="--tuple-arg", + type=tuple[int, str], + position=6, + sep=" ", + ), + ShellDef.additional_args, + ] + assert sorted_fields(Cp.Outputs) == [ + output, + shell.out( + name="return_code", + type=int, + help=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help=STDOUT_HELP, + ), + ] + + +@pytest.fixture(params=["static", "dynamic"]) +def Ls(request): + if request.param == "static": + + @shell.define(xor=["complete_date", "date_format_str", None]) + class Ls(ShellDef["Ls.Outputs"]): + executable = "ls" + + directory: Directory = shell.arg( + help="the directory to list the contents of", + argstr="", + position=-1, + ) + hidden: bool = shell.arg( + help=("display hidden FS objects"), + argstr="-a", + default=False, + ) + long_format: bool = shell.arg( + help=( + "display properties of FS object, such as permissions, size and " + "timestamps " + ), + default=False, + argstr="-l", + ) + human_readable: bool = shell.arg( + help="display file sizes in human readable form", + argstr="-h", + default=False, + requires=["long_format"], + ) + complete_date: bool = shell.arg( + help="Show complete date in long format", + argstr="-T", + default=False, + requires=["long_format"], + ) + date_format_str: str | None = shell.arg( + help="format string for ", + argstr="-D", + default=None, + requires=["long_format"], + ) + + @shell.outputs + class Outputs(ShellOutputs): + entries: list = shell.out( + help="list of entries returned by ls command", + callable=list_entries, + ) + + elif request.param == "dynamic": + Ls = shell.define( + "ls", + inputs={ + "directory": shell.arg( + type=Directory, + help="the directory to list the contents of", + argstr="", + position=-1, + ), + "hidden": shell.arg( + type=bool, + help="display hidden FS objects", + default=False, + argstr="-a", + ), + "long_format": { # Mix it up with a full dictionary based definition + "type": bool, + "default": False, + "help": ( + "display properties of FS object, such as permissions, size and " + "timestamps " + ), + "argstr": "-l", + }, + "human_readable": shell.arg( + type=bool, + help="display file sizes in human readable form", + default=False, + argstr="-h", + requires=["long_format"], + ), + "complete_date": shell.arg( + type=bool, + help="Show complete date in long format", + argstr="-T", + default=False, + requires=["long_format"], + ), + "date_format_str": shell.arg( + type=str | None, + help="format string for ", + default=None, + argstr="-D", + requires=["long_format"], + ), + }, + outputs={ + "entries": shell.out( + type=list, + help="list of entries returned by ls command", + callable=list_entries, + ) + }, + xor=["complete_date", "date_format_str", None], + name="Ls", + ) + + else: + assert False + + return Ls + + +def test_shell_fields(Ls): + assert sorted([a.name for a in sorted_fields(Ls)]) == sorted( + [ + "additional_args", + "executable", + "directory", + "hidden", + "long_format", + "human_readable", + "complete_date", + "date_format_str", + ] + ) + + assert [a.name for a in sorted_fields(Ls.Outputs)] == sorted( + [ + "entries", + "stdout", + "stderr", + "return_code", + ] + ) + + +def test_shell_pickle_roundtrip(Ls, tmp_path): + pkl_file = tmp_path / "ls.pkl" + with open(pkl_file, "wb") as f: + cp.dump(Ls, f) + + with open(pkl_file, "rb") as f: + RereadLs = cp.load(f) + + assert RereadLs is Ls + + +# @pytest.mark.xfail(reason="Still need to update tasks to use new shell interface") +def test_shell_run(Ls, tmp_path): + Path.touch(tmp_path / "a") + Path.touch(tmp_path / "b") + Path.touch(tmp_path / "c") + + ls = Ls(directory=tmp_path, long_format=True) + + # Test cmdline + assert ls.directory == Directory(tmp_path) + assert not ls.hidden + assert ls.long_format + assert ls.cmdline == f"ls -l {tmp_path}" + + # Drop Long format flag to make output simpler + ls = Ls(directory=tmp_path) + outputs = ls() + + assert sorted(outputs.entries) == ["a", "b", "c"] + + +@pytest.fixture(params=["static", "dynamic"]) +def A(request): + if request.param == "static": + + @shell.define + class A(ShellDef["A.Outputs"]): + """An example shell interface described in a class + + Parameters + ---------- + x : File + an input file + """ + + executable = "cp" + + x: File = shell.arg(argstr="", position=1) + + class Outputs(ShellOutputs): + """The outputs of the example shell interface + + Parameters + ---------- + y : File + path of output file""" + + y: File = shell.outarg(path_template="{x}_out", position=-1) + + elif request.param == "dynamic": + A = shell.define( + "cp", + inputs={ + "x": shell.arg( + type=File, + help="an input file", + argstr="", + position=1, + ), + }, + outputs={ + "y": shell.outarg( + type=File, + help="path of output file", + argstr="", + path_template="{x}_out", + ), + }, + name="A", + ) + else: + assert False + + return A + + +def test_shell_output_path_template(A): + assert "y" in [a.name for a in attrs.fields(A.Outputs)] + + +def test_shell_output_field_name_static(): + @shell.define + class A(ShellDef["A.Outputs"]): + """Copy a file""" + + executable = "cp" + + x: File = shell.arg(help="an input file", argstr="", position=1) + + class Outputs(ShellOutputs): + y: File = shell.outarg( + help="the output file", + path_template="{x}_out", + argstr="", + position=-1, + ) + + assert sorted([a.name for a in attrs.fields(A) if not a.name.startswith("_")]) == [ + "additional_args", + "executable", + "x", + "y", + ] + assert sorted(a.name for a in attrs.fields(A.Outputs)) == [ + "return_code", + "stderr", + "stdout", + "y", + ] + output = shell.outarg( + name="y", + type=File, + help="the output file", + path_template="{x}_out", + argstr="", + position=-1, + ) + assert sorted_fields(A) == [ + shell.arg( + name="executable", + validator=attrs.validators.min_len(1), + default="cp", + type=str | ty.Sequence[str], + argstr="", + position=0, + help=shell.EXECUTABLE_HELP_STRING, + ), + shell.arg( + name="x", + type=File, + help="an input file", + argstr="", + position=1, + ), + output, + ShellDef.additional_args, + ] + assert sorted_fields(A.Outputs) == [ + output, + shell.out( + name="return_code", + type=int, + help=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help=STDOUT_HELP, + ), + ] + + +def test_shell_output_field_name_dynamic(): + A = shell.define( + "cp", + name="A", + inputs={ + "x": shell.arg( + type=File, + help="an input file", + argstr="", + position=1, + ), + }, + outputs={ + "y": shell.outarg( + type=File, + help="path of output file", + argstr="", + path_template="{x}_out", + ), + }, + ) + + assert "y" in [a.name for a in attrs.fields(A.Outputs)] + + +def get_file_size(y: Path): + result = os.stat(y) + return result.st_size + + +def test_shell_bases_dynamic(A, tmp_path): + B = shell.define( + name="B", + inputs={"y": shell.arg(type=File, help="output file", argstr="", position=-1)}, + outputs={ + "out_file_size": { + "type": int, + "help": "size of the output directory", + "callable": get_file_size, + } + }, + bases=[A], + ) + + xpath = tmp_path / "x.txt" + ypath = tmp_path / "y.txt" + Path.touch(xpath) + Path.touch(ypath) + + b = B(x=xpath, y=ypath) + + assert b.x == File(xpath) + assert b.y == File(ypath) + + # outputs = b() + # assert outputs.y == str(ypath) + + +def test_shell_bases_static(A, tmp_path): + @shell.define + class B(A): + + y: text.Plain = shell.arg() # Override the output arg in A + + class Outputs(ShellOutputs): + """ + Args: + out_file_size: size of the output directory + """ + + out_file_size: int = shell.out(callable=get_file_size) + + xpath = tmp_path / "x.txt" + ypath = tmp_path / "y.txt" + Path.touch(xpath) + ypath.write_text("Hello, World!") + + a = A(x=xpath, y=ypath) + assert a.x == File(xpath) + assert a.y == ypath + + b = B(x=xpath, y=str(ypath)) + assert b.x == File(xpath) + # We have overridden the type of y from an output arg with a path_template so it + # gets coerced to a text.Plain object + assert b.y == text.Plain(ypath) + + # outputs = b() + # assert outputs.y == str(ypath) + + +def test_shell_inputs_outputs_bases_dynamic(tmp_path): + A = shell.define( + "ls", + name="A", + inputs={ + "directory": shell.arg( + type=Directory, + help="input directory", + argstr="", + position=-1, + ) + }, + outputs={ + "entries": shell.out( + type=list, + help="list of entries returned by ls command", + callable=list_entries, + ) + }, + ) + B = shell.define( + "ls", + name="B", + inputs={ + "hidden": shell.arg( + type=bool, + argstr="-a", + help="show hidden files", + default=False, + ) + }, + bases=[A], + ) + + b = B(directory=tmp_path, hidden=True) + + assert b.directory == Directory(tmp_path) + assert b.hidden + + # File.sample(tmp_path, stem=".hidden-file") + # outputs = b() + # assert result.runner.cmdline == f"ls -a {tmp_path}" + # assert outputs.entries == [".", "..", ".hidden-file"] + + +def test_shell_inputs_outputs_bases_static(tmp_path): + @shell.define + class A(ShellDef["A.Outputs"]): + executable = "ls" + + directory: Directory = shell.arg(help="input directory", argstr="", position=-1) + + class Outputs(ShellOutputs): + entries: list = shell.out( + help="list of entries returned by ls command", + callable=list_entries, + ) + + @shell.define + class B(A): + hidden: bool = shell.arg( + help="show hidden files", + argstr="-a", + default=False, + ) + + Path.touch(tmp_path / ".hidden") + + b = B(directory=tmp_path, hidden=True) + + assert b.directory == Directory(tmp_path) + assert b.hidden + + # outputs = b() + # assert outputs.entries == [".", "..", ".hidden"] + + +def test_shell_missing_executable_static(): + with pytest.raises(AttributeError, match="must have an `executable` attribute"): + + @shell.define + class A: + directory: Directory = shell.arg( + help="input directory", argstr="", position=-1 + ) + + class Outputs: + entries: list = shell.out( + help="list of entries returned by ls command", + callable=list_entries, + ) + + +def test_shell_missing_executable_dynamic(): + with pytest.raises( + ValueError, + match=r"name \('A'\) can only be provided when creating a class dynamically", + ): + shell.define( + name="A", + inputs={ + "directory": shell.arg( + type=Directory, + help="input directory", + argstr="", + position=-1, + ), + }, + outputs={ + "entries": shell.out( + type=list, + help="list of entries returned by ls command", + callable=list_entries, + ) + }, + ) + + +def list_entries(stdout): + return stdout.split("\n")[:-1] + + +def sorted_fields(interface): + fields = list_fields(interface) + length = len(fields) - 1 + + def pos_key(out: shell.out) -> int: + if out.name == "additional_args": + return (length + 1, out.name) + try: + pos = out.position + except AttributeError: + return (length, out.name) + if pos < 0: + key = length + pos + else: + key = pos + return (key, out.name) + + return sorted(fields, key=pos_key) diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py new file mode 100644 index 0000000000..090182bc42 --- /dev/null +++ b/pydra/design/tests/test_workflow.py @@ -0,0 +1,493 @@ +from operator import attrgetter +from copy import copy +from unittest.mock import Mock +import attrs +from pydra.engine.lazy import LazyInField, LazyOutField +import typing as ty +from pydra.design import shell, python, workflow +from pydra.engine.helpers import list_fields +from pydra.engine.specs import WorkflowDef, WorkflowOutputs +from pydra.engine.core import Workflow +from pydra.utils.hash import hash_function +from fileformats import video, image + +# NB: We use PascalCase for interfaces and workflow functions as it is translated into a class + + +@python.define +def Add(a, b): + return a + b + + +@python.define +def Mul(a, b): + return a * b + + +@python.define(outputs=["divided"]) +def Divide(x, y): + return x / y + + +@python.define +def Sum(x: list[float]) -> float: + return sum(x) + + +def a_converter(value): + if value is attrs.NOTHING: + return value + return float(value) + + +def test_workflow(): + + @workflow.define + def MyTestWorkflow(a, b): + add = workflow.add(Add(a=a, b=b)) + mul = workflow.add(Mul(a=add.out, b=b)) + return mul.out + + constructor = MyTestWorkflow().constructor + assert constructor.__name__ == "MyTestWorkflow" + + # The constructor function is included as a part of the definition so it is + # included in the hash by default and can be overridden if needed. Not 100% sure + # if this is a good idea or not + assert list_fields(MyTestWorkflow) == [ + workflow.arg(name="a"), + workflow.arg(name="b"), + workflow.arg( + name="constructor", type=ty.Callable, hash_eq=True, default=constructor + ), + ] + assert list_fields(MyTestWorkflow.Outputs) == [ + workflow.out(name="out"), + ] + workflow_spec = MyTestWorkflow(a=1, b=2.0) + wf = Workflow.construct(workflow_spec) + assert wf.inputs.a == 1 + assert wf.inputs.b == 2.0 + assert wf.outputs.out == LazyOutField(node=wf["Mul"], field="out", type=ty.Any) + + # Nodes are named after the specs by default + assert list(wf.node_names) == ["Add", "Mul"] + + +def test_shell_workflow(): + + @workflow.define(outputs=["output_video"]) + def MyTestShellWorkflow( + input_video: video.Mp4, + watermark: image.Png, + watermark_dims: tuple[int, int] = (10, 10), + ) -> video.Mp4: + + add_watermark = workflow.add( + shell.define( + "ffmpeg -i -i " + "-filter_complex " + )( + in_video=input_video, + watermark=watermark, + filter="overlay={}:{}".format(*watermark_dims), + ), + name="add_watermark", + ) + output_video = workflow.add( + shell.define( + "HandBrakeCLI -i -o " + "--width --height ", + )(in_video=add_watermark.out_video, width=1280, height=720), + name="resize", + ).out_video + + return output_video + + constructor = MyTestShellWorkflow().constructor + assert constructor.__name__ == "MyTestShellWorkflow" + assert list_fields(MyTestShellWorkflow) == [ + workflow.arg(name="input_video", type=video.Mp4), + workflow.arg(name="watermark", type=image.Png), + workflow.arg(name="watermark_dims", type=tuple[int, int], default=(10, 10)), + workflow.arg( + name="constructor", type=ty.Callable, hash_eq=True, default=constructor + ), + ] + assert list_fields(MyTestShellWorkflow.Outputs) == [ + workflow.out(name="output_video", type=video.Mp4), + ] + input_video = video.Mp4.mock("input.mp4") + watermark = image.Png.mock("watermark.png") + workflow_spec = MyTestShellWorkflow( + input_video=input_video, + watermark=watermark, + ) + wf = Workflow.construct(workflow_spec) + assert wf.inputs.input_video == input_video + assert wf.inputs.watermark == watermark + assert wf.outputs.output_video == LazyOutField( + node=wf["resize"], field="out_video", type=video.Mp4, type_checked=True + ) + assert list(wf.node_names) == ["add_watermark", "resize"] + + +def test_workflow_canonical(): + """Test class-based workflow definition""" + + # NB: We use PascalCase (i.e. class names) as it is translated into a class + + @workflow.define + class MyTestWorkflow(WorkflowDef["MyTestWorkflow.Outputs"]): + + a: int + b: float = workflow.arg( + help="A float input", + converter=a_converter, + ) + + @staticmethod + def constructor(a, b): + add = workflow.add(Add(a=a, b=b)) + mul = workflow.add(Mul(a=add.out, b=b)) + return mul.out + + @workflow.outputs + class Outputs(WorkflowOutputs): + out: float + + constructor = MyTestWorkflow().constructor + assert constructor.__name__ == "constructor" + + # The constructor function is included as a part of the definition so it is + # included in the hash by default and can be overridden if needed. Not 100% sure + # if this is a good idea or not + assert sorted(list_fields(MyTestWorkflow), key=attrgetter("name")) == [ + workflow.arg(name="a", type=int), + workflow.arg(name="b", type=float, help="A float input", converter=a_converter), + workflow.arg( + name="constructor", type=ty.Callable, hash_eq=True, default=constructor + ), + ] + assert list_fields(MyTestWorkflow.Outputs) == [ + workflow.out(name="out", type=float), + ] + workflow_spec = MyTestWorkflow(a=1, b=2.0) + wf = Workflow.construct(workflow_spec) + assert wf.inputs.a == 1 + assert wf.inputs.b == 2.0 + assert wf.outputs.out == LazyOutField(node=wf["Mul"], field="out", type=ty.Any) + + # Nodes are named after the specs by default + assert list(wf.node_names) == ["Add", "Mul"] + + +def test_workflow_lazy(): + + @workflow.define + def MyTestShellWorkflow( + input_video: video.Mp4, + watermark: image.Png, + watermark_dims: tuple[int, int] = (10, 10), + ) -> video.Mp4: + + add_watermark = workflow.add( + shell.define( + "ffmpeg -i -i " + "-filter_complex " + )( + in_video=input_video, + watermark=watermark, + filter="overlay={}:{}".format(*watermark_dims), + ), + name="add_watermark", + ) + output_video = workflow.add( + shell.define( + "HandBrakeCLI -i -o " + "--width --height ", + # By default any input/output specified with a flag (e.g. -i ) + # is considered optional, i.e. of type `FsObject | None`, and therefore + # won't be used by default. By overriding this with non-optional types, + # the fields are specified as being required. + inputs={"in_video": video.Mp4}, + outputs={"out_video": video.Mp4}, + )(in_video=add_watermark.out_video, width=1280, height=720), + name="resize", + ).out_video + + return output_video # test implicit detection of output name + + # input_video = video.Mp4.mock("input.mp4") + # watermark = image.Png.mock("watermark.png") + mock_node = Mock() + mock_node.name = "mock_node" + workflow_spec = MyTestShellWorkflow( + input_video=LazyOutField(node=mock_node, field="a_video", type=video.Mp4), + watermark=LazyOutField(node=mock_node, field="a_watermark", type=image.Png), + ) + Workflow.clear_cache(definition=MyTestShellWorkflow) + wf = Workflow.construct(workflow_spec) + assert wf["add_watermark"].inputs.in_video == LazyInField( + workflow=wf, field="input_video", type=video.Mp4, type_checked=True + ) + assert wf["add_watermark"].inputs.watermark == LazyInField( + workflow=wf, field="watermark", type=image.Png, type_checked=True + ) + + # Check to see that the cache is populated with the new workflow + workflow_cache = Workflow._constructed_cache[hash_function(MyTestShellWorkflow)] + # The non-lazy keys used to construct the workflow + key_set = frozenset(["watermark_dims", "constructor"]) + assert list(workflow_cache) == [key_set] + assert len(workflow_cache[key_set]) == 1 + + # check to see that the cache is not used if we change the value of one of the + # non lazy fields + workflow_spec.watermark_dims = (20, 20) + wf2 = Workflow.construct(workflow_spec) + assert wf2 is not wf + assert list(workflow_cache) == [key_set] + assert len(workflow_cache[key_set]) == 2 + + # check to see that the cache is used if we provide a concrete value for one of the + # lazy fields + workflow_spec.input_video = video.Mp4.mock("input.mp4") + wf3 = Workflow.construct(workflow_spec) + assert wf3 is wf2 + assert list(workflow_cache) == [key_set] + assert len(workflow_cache[key_set]) == 2 + + +def test_direct_access_of_workflow_object(): + + @python.define(inputs={"x": float}, outputs={"z": float}) + def Add(x, y): + return x + y + + def Mul(x, y): + return x * y + + @workflow.define(outputs=["out1", "out2"]) + def MyTestWorkflow(a: int, b: float) -> tuple[float, float]: + """A test workflow demonstration a few alternative ways to set and connect nodes + + Args: + a: An integer input + b: A float input + + Returns: + out1: The first output + out2: The second output + """ + + wf = workflow.this() + + add = wf.add(Add(x=a, y=b), name="addition") + mul = wf.add(python.define(Mul, outputs={"out": float})(x=add.z, y=b)) + divide = wf.add(Divide(x=wf["addition"].lzout.z, y=mul.out), name="division") + + # Alter one of the inputs to a node after it has been initialised + wf["Mul"].inputs.y *= 2 + + return mul.out, divide.divided + + assert list_fields(MyTestWorkflow) == [ + workflow.arg(name="a", type=int, help="An integer input"), + workflow.arg(name="b", type=float, help="A float input"), + workflow.arg( + name="constructor", + type=ty.Callable, + hash_eq=True, + default=MyTestWorkflow().constructor, + ), + ] + assert list_fields(MyTestWorkflow.Outputs) == [ + workflow.out(name="out1", type=float, help="The first output"), + workflow.out(name="out2", type=float, help="The second output"), + ] + workflow_spec = MyTestWorkflow(a=1, b=2.0) + wf = Workflow.construct(workflow_spec) + assert wf.inputs.a == 1 + assert wf.inputs.b == 2.0 + assert wf.outputs.out1 == LazyOutField( + node=wf["Mul"], field="out", type=float, type_checked=True + ) + assert wf.outputs.out2 == LazyOutField( + node=wf["division"], field="divided", type=ty.Any + ) + assert list(wf.node_names) == ["addition", "Mul", "division"] + + +def test_workflow_set_outputs_directly(): + + @workflow.define(outputs={"out1": float, "out2": float}) + def MyTestWorkflow(a: int, b: float): + + wf = workflow.this() + add = wf.add(Add(a=a, b=b)) + wf.add(Mul(a=add.out, b=b)) + + # Set the outputs of the workflow directly instead of returning them them in + # a tuple + wf.outputs.out2 = add.out # Using the returned lzout outputs + wf.outputs.out1 = wf["Mul"].lzout.out # accessing the lzout outputs via getitem + + # no return is used when the outputs are set directly + + assert list_fields(MyTestWorkflow) == [ + workflow.arg(name="a", type=int), + workflow.arg(name="b", type=float), + workflow.arg( + name="constructor", + type=ty.Callable, + hash_eq=True, + default=MyTestWorkflow().constructor, + ), + ] + assert list_fields(MyTestWorkflow.Outputs) == [ + workflow.out(name="out1", type=float), + workflow.out(name="out2", type=float), + ] + workflow_spec = MyTestWorkflow(a=1, b=2.0) + wf = Workflow.construct(workflow_spec) + assert wf.inputs.a == 1 + assert wf.inputs.b == 2.0 + assert wf.outputs.out1 == LazyOutField(node=wf["Mul"], field="out", type=ty.Any) + assert wf.outputs.out2 == LazyOutField(node=wf["Add"], field="out", type=ty.Any) + assert list(wf.node_names) == ["Add", "Mul"] + + +def test_workflow_split_combine1(): + + @python.define + def Mul(x: float, y: float) -> float: + return x * y + + @python.define + def Sum(x: list[float]) -> float: + return sum(x) + + @workflow.define + def MyTestWorkflow(a: list[int], b: list[float]) -> list[float]: + mul = workflow.add(Mul().split(x=a, y=b).combine("x")) + sum = workflow.add(Sum(x=mul.out)) + return sum.out + + wf = Workflow.construct(MyTestWorkflow(a=[1, 2, 3], b=[1.0, 10.0, 100.0])) + assert wf["Mul"].splitter == ["Mul.x", "Mul.y"] + assert wf["Mul"].combiner == ["Mul.x"] + assert wf.outputs.out == LazyOutField( + node=wf["Sum"], field="out", type=list[float], type_checked=True + ) + + +def test_workflow_split_combine2(): + + @python.define + def Mul(x: float, y: float) -> float: + return x * y + + @python.define + def Add(x: float, y: float) -> float: + return x + y + + @workflow.define + def MyTestWorkflow(a: list[int], b: list[float], c: float) -> list[float]: + mul = workflow.add(Mul().split(x=a, y=b)) + add = workflow.add(Add(x=mul.out, y=c).combine("Mul.x")) + sum = workflow.add(Sum(x=add.out)) + return sum.out + + wf = Workflow.construct(MyTestWorkflow(a=[1, 2, 3], b=[1.0, 10.0, 100.0], c=2.0)) + assert wf["Mul"].splitter == ["Mul.x", "Mul.y"] + assert wf["Mul"].combiner == [] + assert wf["Add"].splitter == "_Mul" + assert wf["Add"].combiner == ["Mul.x"] + assert wf.outputs.out == LazyOutField( + node=wf["Sum"], field="out", type=list[float], type_checked=True + ) + + +def test_nested_workflow(): + """Simple test of a nested workflow""" + + @python.define + def Add(x: float, y: float) -> float: + return x + y + + @python.define + def Mul(x: float, y: float) -> float: + return x * y + + @python.define + def Divide(x: float, y: float) -> float: + return x / y + + @python.define + def Power(x: float, y: float) -> float: + return x**y + + @workflow.define + def NestedWorkflow(a: float, b: float, c: float) -> float: + pow = workflow.add(Power(x=a, y=c)) + add = workflow.add(Add(x=pow.out, y=b)) + return add.out + + @workflow.define + def MyTestWorkflow(a: int, b: float, c: float) -> float: + div = workflow.add(Divide(x=a, y=b)) + nested = workflow.add(NestedWorkflow(a=div.out, b=b, c=c)) + return nested.out + + wf = Workflow.construct(MyTestWorkflow(a=1, b=10.0, c=2.0)) + assert wf.inputs.a == 1 + assert wf.inputs.b == 10.0 + assert wf.inputs.c == 2.0 + assert wf.outputs.out == LazyOutField( + node=wf["NestedWorkflow"], field="out", type=float, type_checked=True + ) + assert list(wf.node_names) == ["Divide", "NestedWorkflow"] + nwf_spec = copy(wf["NestedWorkflow"]._definition) + nwf_spec.a = 100.0 + nwf = Workflow.construct(nwf_spec) + nwf.inputs.a == 100.0 + nwf.inputs.b == 10.0 + nwf.inputs.c == 2.0 + nwf.outputs.out == LazyOutField(node=nwf["Add"], field="out", type=float) + assert list(nwf.node_names) == ["Power", "Add"] + + +def test_recursively_nested_conditional_workflow(): + """More complex nested workflow example demonstrating conditional branching at run + time""" + + @python.define + def Add(x: float, y: float) -> float: + return x + y + + @python.define + def Subtract(x: float, y: float) -> float: + return x - y + + @workflow.define + def RecursiveNestedWorkflow(a: float, depth: int) -> float: + add = workflow.add(Add(x=a, y=1)) + decrement_depth = workflow.add(Subtract(x=depth, y=1)) + if depth > 0: + out_node = workflow.add( + RecursiveNestedWorkflow(a=add.out, depth=decrement_depth.out) + ) + else: + out_node = add + return out_node.out + + wf = Workflow.construct(RecursiveNestedWorkflow(a=1, depth=3)) + assert wf.inputs.a == 1 + assert wf.inputs.depth == 3 + assert wf.outputs.out == LazyOutField( + node=wf["RecursiveNestedWorkflow"], + field="out", + type=float, + type_checked=True, + ) diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py new file mode 100644 index 0000000000..c1be87298f --- /dev/null +++ b/pydra/design/workflow.py @@ -0,0 +1,268 @@ +import typing as ty +import inspect +from typing import dataclass_transform +import attrs +from pydra.design.base import ( + Arg, + Out, + ensure_field_objects, + make_task_def, + parse_doc_string, + extract_function_inputs_and_outputs, + check_explicit_fields_are_none, + extract_fields_from_class, +) + +if ty.TYPE_CHECKING: + from pydra.engine.core import Workflow + from pydra.engine.specs import TaskDef, TaskOutputs, WorkflowDef + from pydra.engine.environments import Environment + from pydra.engine.specs import TaskHooks + + +__all__ = ["define", "add", "this", "arg", "out"] + + +@attrs.define +class arg(Arg): + """Argument of a workflow task definition + + Parameters + ---------- + help: str + A short description of the input field. + default : Any, optional + the default value for the argument + allowed_values: list, optional + List of allowed values for the field. + requires: list, optional + Names of the inputs that are required together with the field. + copy_mode: File.CopyMode, optional + The mode of copying the file, by default it is File.CopyMode.any + copy_collation: File.CopyCollation, optional + The collation of the file, by default it is File.CopyCollation.any + copy_ext_decomp: File.ExtensionDecomposition, optional + The extension decomposition of the file, by default it is + File.ExtensionDecomposition.single + readonly: bool, optional + If True the input field can’t be provided by the user but it aggregates other + input fields (for example the fields with argstr: -o {fldA} {fldB}), by default + it is False + type: type, optional + The type of the field, by default it is Any + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + lazy: bool, optional + If True the input field is not required at construction time but is passed straight + through to the tasks, by default it is False + """ + + pass + + +@attrs.define +class out(Out): + """Output of a workflow task definition + + Parameters + ---------- + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + type: type, optional + The type of the field, by default it is Any + help: str, optional + A short description of the input field. + requires: list, optional + Names of the inputs that are required together with the field. + converter: callable, optional + The converter for the field passed through to the attrs.field, by default it is None + validator: callable | iterable[callable], optional + The validator(s) for the field passed through to the attrs.field, by default it is None + """ + + pass + + +@dataclass_transform( + kw_only_default=True, + field_specifiers=(out,), +) +def outputs(wrapped): + """Decorator to specify the output fields of a shell command is a dataclass-style type""" + return wrapped + + +@dataclass_transform( + kw_only_default=True, + field_specifiers=(arg,), +) +def define( + wrapped: type | ty.Callable | None = None, + /, + inputs: list[str | Arg] | dict[str, Arg | type] | None = None, + outputs: list[str | Out] | dict[str, Out | type] | type | None = None, + bases: ty.Sequence[type] = (), + outputs_bases: ty.Sequence[type] = (), + lazy: list[str] | None = None, + auto_attribs: bool = True, + xor: ty.Sequence[str | None] | ty.Sequence[ty.Sequence[str | None]] = (), +) -> "WorkflowDef": + """ + Create an interface for a function or a class. Can be used either as a decorator on + a constructor function or the "canonical" dataclass-form of a task definition. + + Parameters + ---------- + wrapped : type | callable | None + The function or class to create an interface for. + inputs : list[str | Arg] | dict[str, Arg | type] | None + The inputs to the function or class. + outputs : list[str | Out] | dict[str, Out | type] | type | None + The outputs of the function or class. + auto_attribs : bool + Whether to use auto_attribs mode when creating the class. + xor: Sequence[str | None] | Sequence[Sequence[str | None]], optional + Names of args that are exclusive mutually exclusive, which must include + the name of the current field. If this list includes None, then none of the + fields need to be set. + + Returns + ------- + TaskDef + The interface for the function or class. + """ + from pydra.engine.specs import TaskDef, WorkflowDef, WorkflowOutputs + + if lazy is None: + lazy = [] + + def make(wrapped: ty.Callable | type) -> TaskDef: + if inspect.isclass(wrapped): + klass = wrapped + constructor = klass.constructor + name = klass.__name__ + check_explicit_fields_are_none(klass, inputs, outputs) + parsed_inputs, parsed_outputs = extract_fields_from_class( + WorkflowDef, WorkflowOutputs, klass, arg, out, auto_attribs + ) + else: + if not inspect.isfunction(wrapped): + raise ValueError( + f"wrapped must be a class or a function, not {wrapped!r}" + ) + klass = None + constructor = wrapped + input_helps, output_helps = parse_doc_string(constructor.__doc__) + inferred_inputs, inferred_outputs = extract_function_inputs_and_outputs( + constructor, arg, inputs, outputs + ) + name = constructor.__name__ + + parsed_inputs, parsed_outputs = ensure_field_objects( + arg_type=arg, + out_type=out, + inputs=inferred_inputs, + outputs=inferred_outputs, + input_helps=input_helps, + output_helps=output_helps, + ) + + parsed_inputs["constructor"] = arg( + name="constructor", type=ty.Callable, hash_eq=True, default=constructor + ) + for inpt_name in lazy: + parsed_inputs[inpt_name].lazy = True + + defn = make_task_def( + WorkflowDef, + WorkflowOutputs, + parsed_inputs, + parsed_outputs, + name=name, + klass=klass, + bases=bases, + outputs_bases=outputs_bases, + xor=xor, + ) + + return defn + + if wrapped is not None: + if not isinstance(wrapped, (ty.Callable, type)): + raise ValueError(f"wrapped must be a class or a callable, not {wrapped!r}") + return make(wrapped) + return make + + +def this() -> "Workflow": + """Get the workflow currently being constructed. + + Returns + ------- + Workflow + The workflow currently being constructed. + """ + from pydra.engine.core import Workflow + + return Workflow.under_construction() + + +OutputsType = ty.TypeVar("OutputsType", bound="TaskOutputs") + + +def add( + task_def: "TaskDef[OutputsType]", + name: str | None = None, + environment: "Environment | None" = None, + hooks: "TaskHooks | None" = None, +) -> OutputsType: + """Add a node to the workflow currently being constructed + + Parameters + ---------- + task_def : TaskDef + The definition of the task to add to the workflow as a node + name : str, optional + The name of the node, by default it will be the name of the task definition + class + environment : Environment, optional + The environment to run the task in, such as the Docker or Singularity container, + by default it will be the "native" + hooks : TaskHooks, optional + The hooks to run before or after the task, by default no hooks will be run + + Returns + ------- + Outputs + The outputs definition of the node + """ + return this().add(task_def, name=name, environment=environment, hooks=hooks) + + +U = ty.TypeVar("U") + + +def cast(field: ty.Any, new_type: type[U]) -> U: + """Cast a lazy field to a new type. Note that the typing in the signature is a white + lie, as the return field is actually a LazyField as placeholder for the object of + type U. + + Parameters + ---------- + field : LazyField[T] + The field to cast + new_type : type[U] + The new type to cast the field to + + Returns + ------- + LazyField[U] + A copy of the lazy field with the new type + """ + return attrs.evolve( + field, + type=new_type, + cast_from=field._cast_from if field._cast_from else field._type, + ) diff --git a/pydra/engine/__init__.py b/pydra/engine/__init__.py index 2eca36ba28..46cf85c08f 100644 --- a/pydra/engine/__init__.py +++ b/pydra/engine/__init__.py @@ -1,14 +1,26 @@ """The core of the workflow engine.""" -from .submitter import Submitter -from .core import Workflow -from .task import AuditFlag, ShellCommandTask -from . import specs +import __main__ +import logging +from ._version import __version__ __all__ = [ - "AuditFlag", - "ShellCommandTask", - "Submitter", - "Workflow", - "specs", + "logger", + "check_latest_version", ] + +logger = logging.getLogger("pydra") + + +def check_latest_version(): + import etelemetry + + return etelemetry.check_available_version("nipype/pydra", __version__, lgr=logger) + + +# Run telemetry on import for interactive sessions, such as IPython, Jupyter notebooks, Python REPL +# if not hasattr(__main__, "__file__"): +# from pydra.engine.core import TaskBase + +# if TaskBase._etelemetry_version_data is None: +# TaskBase._etelemetry_version_data = check_latest_version() diff --git a/pydra/engine/audit.py b/pydra/engine/audit.py index 7397fad6e6..1622565dab 100644 --- a/pydra/engine/audit.py +++ b/pydra/engine/audit.py @@ -1,19 +1,21 @@ """Module to keep track of provenance information.""" import os +import typing as ty import json -import attr -from ..utils.messenger import send_message, make_message, gen_uuid, now, AuditFlag -from ..utils.hash import hash_function -from .helpers import ensure_list, gather_runtime_info -from .specs import attr_fields +from pydra.utils.messenger import send_message, make_message, gen_uuid, now, AuditFlag +from pydra.engine.helpers import attrs_values from fileformats.core import FileSet +from pydra.utils.hash import hash_function try: import importlib_resources except ImportError: import importlib.resources as importlib_resources # type: ignore +if ty.TYPE_CHECKING: + from pydra.engine.task import Task + class Audit: """Handle provenance tracking and resource utilization.""" @@ -28,7 +30,7 @@ def __init__(self, audit_flags, messengers, messenger_args, develop=None): Base configuration of auditing. messengers : :obj:`pydra.util.messenger.Messenger` or list of :class:`pydra.util.messenger.Messenger`, optional - Specify types of messenger used by Audit to send a message. + Taskify types of messenger used by Audit to send a message. Could be `PrintMessenger`, `FileMessenger`, or `RemoteRESTMessenger`. messenger_args : :obj:`dict`, optional Optional arguments for the `Messenger.send` method. @@ -36,6 +38,8 @@ def __init__(self, audit_flags, messengers, messenger_args, develop=None): If True, the local context.jsonld file is used, otherwise the one from github is used. """ + from .helpers import ensure_list + self.audit_flags = audit_flags self.messengers = ensure_list(messengers) self.messenger_args = messenger_args @@ -70,7 +74,7 @@ def start_audit(self, odir): if self.audit_check(AuditFlag.PROV): self.audit_message(start_message, AuditFlag.PROV) if self.audit_check(AuditFlag.RESOURCE): - from ..utils.profiler import ResourceMonitor + from pydra.utils.profiler import ResourceMonitor self.resource_monitor = ResourceMonitor(os.getpid(), logdir=self.odir) @@ -93,6 +97,8 @@ def monitor(self): def finalize_audit(self, result): """End auditing.""" if self.audit_check(AuditFlag.RESOURCE): + from .helpers import gather_runtime_info + self.resource_monitor.stop() result.runtime = gather_runtime_info(self.resource_monitor.fname) if self.audit_check(AuditFlag.PROV): @@ -102,7 +108,7 @@ def finalize_audit(self, result): ) # audit resources/runtime information self.eid = f"uid:{gen_uuid()}" - entity = attr.asdict(result.runtime, recurse=False) + entity = attrs_values(result.runtime) entity.update( **{ "@id": self.eid, @@ -176,16 +182,19 @@ def audit_check(self, flag): """ return self.audit_flags & flag - def audit_task(self, task): + def audit_task(self, task: "Task"): import subprocess as sp + from .helpers import list_fields label = task.name - command = task.cmdline if hasattr(task.inputs, "executable") else None - attr_list = attr_fields(task.inputs) + command = ( + task.definition.cmdline if hasattr(task.definition, "executable") else None + ) + attr_list = list_fields(task.definition) for attrs in attr_list: input_name = attrs.name - value = getattr(task.inputs, input_name) + value = task.inputs[input_name] if isinstance(value, FileSet): input_path = os.path.abspath(value) file_hash = hash_function(value) diff --git a/pydra/engine/boutiques.py b/pydra/engine/boutiques.py deleted file mode 100644 index 0f3cf110e1..0000000000 --- a/pydra/engine/boutiques.py +++ /dev/null @@ -1,213 +0,0 @@ -import typing as ty -import json -import attr -from urllib.request import urlretrieve -from pathlib import Path -from functools import reduce - -from ..utils.messenger import AuditFlag -from ..engine import ShellCommandTask -from ..engine.specs import SpecInfo, ShellSpec, ShellOutSpec, File, attr_fields -from .helpers_file import is_local_file - - -class BoshTask(ShellCommandTask): - """Shell Command Task based on the Boutiques descriptor""" - - def __init__( - self, - zenodo_id=None, - bosh_file=None, - audit_flags: AuditFlag = AuditFlag.NONE, - cache_dir=None, - input_spec_names: ty.Optional[ty.List] = None, - messenger_args=None, - messengers=None, - name=None, - output_spec_names: ty.Optional[ty.List] = None, - rerun=False, - strip=False, - **kwargs, - ): - """ - Initialize this task. - - Parameters - ---------- - zenodo_id: :obj: str - Zenodo ID - bosh_file : : str - json file with the boutiques descriptors - audit_flags : :obj:`pydra.utils.messenger.AuditFlag` - Auditing configuration - cache_dir : :obj:`os.pathlike` - Cache directory - input_spec_names : :obj: list - Input names for input_spec. - messenger_args : - TODO - messengers : - TODO - name : :obj:`str` - Name of this task. - output_spec_names : :obj: list - Output names for output_spec. - strip : :obj:`bool` - TODO - - """ - self.cache_dir = cache_dir - if (bosh_file and zenodo_id) or not (bosh_file or zenodo_id): - raise Exception("either bosh or zenodo_id has to be specified") - elif zenodo_id: - self.bosh_file = self._download_spec(zenodo_id) - else: # bosh_file - self.bosh_file = bosh_file - - with self.bosh_file.open() as f: - self.bosh_spec = json.load(f) - - self.input_spec = self._prepare_input_spec(names_subset=input_spec_names) - self.output_spec = self._prepare_output_spec(names_subset=output_spec_names) - self.bindings = ["-v", f"{self.bosh_file.parent}:{self.bosh_file.parent}:ro"] - - super().__init__( - name=name, - input_spec=self.input_spec, - output_spec=self.output_spec, - executable=["bosh", "exec", "launch"], - args=["-s"], - audit_flags=audit_flags, - messengers=messengers, - messenger_args=messenger_args, - cache_dir=self.cache_dir, - strip=strip, - rerun=rerun, - **kwargs, - ) - self.strip = strip - - def _download_spec(self, zenodo_id): - """ - using boutiques Searcher to find url of zenodo file for a specific id, - and download the file to self.cache_dir - """ - from boutiques.searcher import Searcher - - searcher = Searcher(zenodo_id, exact_match=True) - hits = searcher.zenodo_search().json()["hits"]["hits"] - if len(hits) == 0: - raise Exception(f"can't find zenodo spec for {zenodo_id}") - elif len(hits) > 1: - raise Exception(f"too many hits for {zenodo_id}") - else: - zenodo_url = hits[0]["files"][0]["links"]["self"] - zenodo_file = self.cache_dir / f"zenodo.{zenodo_id}.json" - urlretrieve(zenodo_url, zenodo_file) - return zenodo_file - - def _prepare_input_spec(self, names_subset=None): - """creating input spec from the zenodo file - if name_subset provided, only names from the subset will be used in the spec - """ - binputs = self.bosh_spec["inputs"] - self._input_spec_keys = {} - fields = [] - for input in binputs: - name = input["id"] - if names_subset is None: - pass - elif name not in names_subset: - continue - else: - names_subset.remove(name) - if input["type"] == "File": - tp = File - elif input["type"] == "String": - tp = str - elif input["type"] == "Number": - tp = float - elif input["type"] == "Flag": - tp = bool - else: - tp = None - # adding list - if tp and "list" in input and input["list"]: - tp = ty.List[tp] - - mdata = { - "help_string": input.get("description", None) or input["name"], - "mandatory": not input["optional"], - "argstr": input.get("command-line-flag", None), - } - fields.append((name, tp, mdata)) - self._input_spec_keys[input["value-key"]] = "{" + f"{name}" + "}" - if names_subset: - raise RuntimeError(f"{names_subset} are not in the zenodo input spec") - spec = SpecInfo(name="Inputs", fields=fields, bases=(ShellSpec,)) - return spec - - def _prepare_output_spec(self, names_subset=None): - """creating output spec from the zenodo file - if name_subset provided, only names from the subset will be used in the spec - """ - boutputs = self.bosh_spec["output-files"] - fields = [] - for output in boutputs: - name = output["id"] - if names_subset is None: - pass - elif name not in names_subset: - continue - else: - names_subset.remove(name) - path_template = reduce( - lambda s, r: s.replace(*r), - self._input_spec_keys.items(), - output["path-template"], - ) - mdata = { - "help_string": output.get("description", None) or output["name"], - "mandatory": not output["optional"], - "output_file_template": path_template, - } - fields.append((name, attr.ib(type=File, metadata=mdata))) - - if names_subset: - raise RuntimeError(f"{names_subset} are not in the zenodo output spec") - spec = SpecInfo(name="Outputs", fields=fields, bases=(ShellOutSpec,)) - return spec - - def _command_args_single(self, state_ind=None, index=None): - """Get command line arguments for a single state""" - input_filepath = self._bosh_invocation_file(state_ind=state_ind, index=index) - cmd_list = ( - self.inputs.executable - + [str(self.bosh_file), input_filepath] - + self.inputs.args - + self.bindings - ) - return cmd_list - - def _bosh_invocation_file(self, state_ind=None, index=None): - """creating bosh invocation file - json file with inputs values""" - input_json = {} - for f in attr_fields(self.inputs, exclude_names=("executable", "args")): - if self.state and f"{self.name}.{f.name}" in state_ind: - value = getattr(self.inputs, f.name)[state_ind[f"{self.name}.{f.name}"]] - else: - value = getattr(self.inputs, f.name) - # adding to the json file if specified by the user - if value is not attr.NOTHING and value != "NOTHING": - if is_local_file(f): - value = Path(value) - self.bindings.extend(["-v", f"{value.parent}:{value.parent}:ro"]) - value = str(value) - - input_json[f.name] = value - - filename = self.cache_dir / f"{self.name}-{index}.json" - with open(filename, "w") as jsonfile: - json.dump(input_json, jsonfile) - - return str(filename) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index d0081e3ace..56752f071f 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -1,62 +1,63 @@ """Basic processing graph elements.""" -import abc import json import logging -import itertools -from functools import cached_property import os +import inspect import sys from pathlib import Path import typing as ty -from copy import deepcopy, copy from uuid import uuid4 -from filelock import SoftFileLock import shutil -from tempfile import mkdtemp from traceback import format_exception import attr import cloudpickle as cp -from . import state -from . import helpers_state as hlpst +from copy import copy +from collections import defaultdict +from typing import Self +import attrs +from filelock import SoftFileLock +from pydra.engine.specs import TaskDef, WorkflowDef, TaskOutputs, WorkflowOutputs +from pydra.engine.graph import DiGraph +from pydra.engine import state +from .lazy import LazyInField, LazyOutField +from pydra.utils.hash import hash_function, Cache +from pydra.engine.state import State +from .node import Node +from datetime import datetime +from fileformats.core import FileSet from .specs import ( - File, - BaseSpec, RuntimeSpec, Result, - SpecInfo, - LazyIn, - LazyOut, - LazyField, - TaskHook, - attr_fields, - StateArray, + TaskHooks, ) from .helpers import ( - make_klass, - create_checksum, - print_help, + attrs_fields, + attrs_values, load_result, save, - ensure_list, record_error, PydraFileLock, - parse_copyfile, + list_fields, + is_lazy, + ensure_list, ) -from ..utils.hash import hash_function from .helpers_file import copy_nested_files, template_update -from .graph import DiGraph -from .audit import Audit -from ..utils.messenger import AuditFlag -from ..utils.typing import TypeParser -from fileformats.core import FileSet +from pydra.utils.messenger import AuditFlag +from pydra.engine.environments import Environment logger = logging.getLogger("pydra") develop = False +if ty.TYPE_CHECKING: + from pydra.engine.submitter import Submitter, NodeExecution + from pydra.design.base import Arg -class TaskBase: +DefType = ty.TypeVar("DefType", bound=TaskDef) + + +class Task(ty.Generic[DefType]): """ A base structure for the nodes in the processing graph. @@ -84,17 +85,23 @@ class TaskBase: _cache_dir = None # Working directory in which to operate _references = None # List of references for a task + name: str + definition: DefType + submitter: "Submitter | None" + environment: "Environment | None" + state_index: int + bindings: dict[str, ty.Any] | None = None # Bindings for the task environment + + _inputs: dict[str, ty.Any] | None = None + def __init__( self, + definition: DefType, + submitter: "Submitter", name: str, - audit_flags: AuditFlag = AuditFlag.NONE, - cache_dir=None, - cache_locations=None, - inputs: ty.Optional[ty.Union[ty.Text, File, ty.Dict]] = None, - cont_dim=None, - messenger_args=None, - messengers=None, - rerun=False, + environment: "Environment | None" = None, + state_index: int | None = None, + hooks: TaskHooks | None = None, ): """ Initialize a task. @@ -111,141 +118,81 @@ def __init__( b. Gets killed -> restart 3. No cache or other process -> start 4. Two or more concurrent new processes get to start - - Parameters - ---------- - name : :obj:`str` - Unique name of this node - audit_flags : :class:`AuditFlag`, optional - Configure provenance tracking. Default is no provenance tracking. - See available flags at :class:`~pydra.utils.messenger.AuditFlag`. - cache_dir : :obj:`os.pathlike` - Set a custom directory of previously computed nodes. - cache_locations : - TODO - inputs : :obj:`typing.Text`, or :class:`File`, or :obj:`dict`, or `None`. - Set particular inputs to this node. - cont_dim : :obj:`dict`, or `None` - Container dimensions for input fields, - if any of the container should be treated as a container - messenger_args : - TODO - messengers : - TODO """ - from .. import check_latest_version - - if TaskBase._etelemetry_version_data is None: - TaskBase._etelemetry_version_data = check_latest_version() - # raise error if name is same as of attributes - if name in dir(self): - raise ValueError("Cannot use names of attributes or methods as task name") - self.name = name - if not self.input_spec: - raise Exception("No input_spec in class: %s" % self.__class__.__name__) - klass = make_klass(self.input_spec) - - self.inputs = klass( - **{ - # in attrs names that starts with "_" could be set when name provided w/o "_" - (f.name[1:] if f.name.startswith("_") else f.name): f.default - for f in attr.fields(klass) - } + if not isinstance(definition, TaskDef): + raise ValueError( + f"Task definition ({definition!r}) must be a TaskDef, not {type(definition)}" + ) + # Check that the definition is fully resolved and ready to run + definition._check_resolved() + definition._check_rules() + self.definition = definition + # We save the submitter is the definition is a workflow otherwise we don't + # so the task can be pickled + self.submitter = submitter + self.environment = ( + environment if environment is not None else submitter.environment ) + self.name = name + self.state_index = state_index - self.input_names = [ - field.name - for field in attr.fields(klass) - if field.name not in ["_func", "_graph_checksums"] - ] - - if inputs: - if isinstance(inputs, dict): - # selecting items that are in input_names (ignoring fields that are not in input_spec) - inputs = {k: v for k, v in inputs.items() if k in self.input_names} - # TODO: this needs to finished and tested after #305 - elif Path(inputs).is_file(): - inputs = json.loads(Path(inputs).read_text()) - # TODO: this needs to finished and tested after #305 - elif isinstance(inputs, str): - if self._input_sets is None or inputs not in self._input_sets: - raise ValueError(f"Unknown input set {inputs!r}") - inputs = self._input_sets[inputs] - - self.inputs = attr.evolve(self.inputs, **inputs) - - # checking if metadata is set properly - self.inputs.check_metadata() - # dictionary to save the connections with lazy fields - self.inp_lf = {} - self.state = None - # container dimensions provided by the user - self.cont_dim = cont_dim - # container dimension for inner input if needed (e.g. for inner splitter) - self._inner_cont_dim = {} - self._output = {} + self.return_values = {} self._result = {} # flag that says if node finished all jobs self._done = False if self._input_sets is None: self._input_sets = {} - self.audit = Audit( - audit_flags=audit_flags, - messengers=messengers, - messenger_args=messenger_args, - develop=develop, - ) - self.cache_dir = cache_dir - self.cache_locations = cache_locations self.allow_cache_override = True self._checksum = None self._uid = uuid4().hex - # if True the results are not checked (does not propagate to nodes) - self.task_rerun = rerun - - self.plugin = None - self.hooks = TaskHook() + self.hooks = hooks if hooks is not None else TaskHooks() self._errored = False self._lzout = None + # Save the submitter attributes needed to run the task later + self.audit = submitter.audit + self.cache_dir = submitter.cache_dir + self.cache_locations = submitter.cache_locations + + @property + def cache_dir(self): + return self._cache_dir + + @property + def is_async(self) -> bool: + """Check to see if the task should be run asynchronously.""" + return self.submitter.worker.is_async and is_workflow(self.definition) + + @cache_dir.setter + def cache_dir(self, path: os.PathLike): + self._cache_dir = Path(path) + + @property + def cache_locations(self): + """Get the list of cache sources.""" + return self._cache_locations + ensure_list(self.cache_dir) + + @cache_locations.setter + def cache_locations(self, locations): + if locations is not None: + self._cache_locations = [Path(loc) for loc in ensure_list(locations)] + else: + self._cache_locations = [] + def __str__(self): return self.name def __getstate__(self): state = self.__dict__.copy() - state["input_spec"] = cp.dumps(state["input_spec"]) - state["output_spec"] = cp.dumps(state["output_spec"]) - inputs = {} - for k, v in attr.asdict(state["inputs"], recurse=False).items(): - if k.startswith("_"): - k = k[1:] - inputs[k] = v - state["inputs"] = inputs + state["definition"] = cp.dumps(state["definition"]) return state def __setstate__(self, state): - state["input_spec"] = cp.loads(state["input_spec"]) - state["output_spec"] = cp.loads(state["output_spec"]) - state["inputs"] = make_klass(state["input_spec"])(**state["inputs"]) + state["definition"] = cp.loads(state["definition"]) self.__dict__.update(state) - @cached_property - def lzout(self): - return LazyOut(self) - - def help(self, returnhelp=False): - """Print class help.""" - help_obj = print_help(self) - if returnhelp: - return help_obj - - @property - def version(self): - """Get version of this task structure.""" - return self._version - @property def errored(self): """Check if the task has raised an error""" @@ -258,62 +205,14 @@ def checksum(self): and to create nodes checksums needed for graph checksums (before the tasks have inputs etc.) """ - input_hash = self.inputs.hash - if self.state is None: - self._checksum = create_checksum(self.__class__.__name__, input_hash) - else: - splitter_hash = hash_function(self.state.splitter) - self._checksum = create_checksum( - self.__class__.__name__, hash_function([input_hash, splitter_hash]) - ) + if self._checksum is not None: + return self._checksum + self._checksum = self.definition._checksum return self._checksum - def checksum_states(self, state_index=None): - """ - Calculate a checksum for the specific state or all of the states of the task. - Replaces lists in the inputs fields with a specific values for states. - Used to recreate names of the task directories, - - Parameters - ---------- - state_index : - TODO - - """ - if is_workflow(self) and self.inputs._graph_checksums is attr.NOTHING: - self.inputs._graph_checksums = { - nd.name: nd.checksum for nd in self.graph_sorted - } - - if state_index is not None: - inputs_copy = copy(self.inputs) - for key, ind in self.state.inputs_ind[state_index].items(): - val = self._extract_input_el( - inputs=self.inputs, inp_nm=key.split(".")[1], ind=ind - ) - setattr(inputs_copy, key.split(".")[1], val) - # setting files_hash again in case it was cleaned by setting specific element - # that might be important for outer splitter of input variable with big files - # the file can be changed with every single index even if there are only two files - input_hash = inputs_copy.hash - if is_workflow(self): - con_hash = hash_function(self._connections) - # TODO: hash list is not used - hash_list = [input_hash, con_hash] # noqa: F841 - checksum_ind = create_checksum( - self.__class__.__name__, self._checksum_wf(input_hash) - ) - else: - checksum_ind = create_checksum(self.__class__.__name__, input_hash) - return checksum_ind - else: - checksum_list = [] - if not hasattr(self.state, "inputs_ind"): - self.state.prepare_states(self.inputs, cont_dim=self.cont_dim) - self.state.prepare_inputs() - for ind in range(len(self.state.inputs_ind)): - checksum_list.append(self.checksum_states(state_index=ind)) - return checksum_list + @property + def lockfile(self): + return self.output_dir.with_suffix(".lock") @property def uid(self): @@ -323,148 +222,25 @@ def uid(self): """ return self._uid - def set_state(self, splitter, combiner=None): - """ - Set a particular state on this task. - - Parameters - ---------- - splitter : - TODO - combiner : - TODO - - """ - if splitter is not None: - self.state = state.State( - name=self.name, splitter=splitter, combiner=combiner - ) - else: - self.state = None - return self.state - @property def output_names(self): - """Get the names of the outputs from the task's output_spec - (not everything has to be generated, see generated_output_names). - """ - return [f.name for f in attr.fields(make_klass(self.output_spec))] - - @property - def generated_output_names(self): - """Get the names of the outputs generated by the task. - If the spec doesn't have generated_output_names method, - it uses output_names. - The results depends on the input provided to the task - """ - output_klass = make_klass(self.output_spec) - if hasattr(output_klass, "generated_output_names"): - output = output_klass( - **{f.name: attr.NOTHING for f in attr.fields(output_klass)} - ) - # using updated input (after filing the templates) - _inputs = deepcopy(self.inputs) - modified_inputs = template_update(_inputs, self.output_dir) - if modified_inputs: - _inputs = attr.evolve(_inputs, **modified_inputs) - - return output.generated_output_names( - inputs=_inputs, output_dir=self.output_dir - ) - else: - return self.output_names + """Get the names of the outputs from the task's output_spec""" + return [f.name for f in attr.fields(self.definition.Outputs)] @property def can_resume(self): """Whether the task accepts checkpoint-restart.""" return self._can_resume - @abc.abstractmethod - def _run_task(self, environment=None): - pass - - @property - def cache_dir(self): - """Get the location of the cache directory.""" - return self._cache_dir - - @cache_dir.setter - def cache_dir(self, location): - if location is not None: - self._cache_dir = Path(location).resolve() - self._cache_dir.mkdir(parents=False, exist_ok=True) - else: - self._cache_dir = mkdtemp() - self._cache_dir = Path(self._cache_dir).resolve() - - @property - def cache_locations(self): - """Get the list of cache sources.""" - return self._cache_locations + ensure_list(self._cache_dir) - - @cache_locations.setter - def cache_locations(self, locations): - if locations is not None: - self._cache_locations = [Path(loc) for loc in ensure_list(locations)] - else: - self._cache_locations = [] - @property def output_dir(self): """Get the filesystem path where outputs will be written.""" - if self.state: - return [self._cache_dir / checksum for checksum in self.checksum_states()] - return self._cache_dir / self.checksum + return self.cache_dir / self.checksum @property - def cont_dim(self): - # adding inner_cont_dim to the general container_dimension provided by the users - cont_dim_all = deepcopy(self._cont_dim) - for k, v in self._inner_cont_dim.items(): - cont_dim_all[k] = cont_dim_all.get(k, 1) + v - return cont_dim_all - - @cont_dim.setter - def cont_dim(self, cont_dim): - if cont_dim is None: - self._cont_dim = {} - else: - self._cont_dim = cont_dim + def inputs(self) -> dict[str, ty.Any]: + """Resolve any template inputs of the task ahead of its execution: - def __call__( - self, - submitter=None, - plugin=None, - plugin_kwargs=None, - rerun=False, - environment=None, - **kwargs, - ): - """Make tasks callable themselves.""" - from .submitter import Submitter - - if submitter and plugin: - raise Exception("Specify submitter OR plugin, not both") - elif submitter: - pass - # if there is plugin provided or the task is a Workflow or has a state, - # the submitter will be created using provided plugin, self.plugin or "cf" - elif plugin or self.state or is_workflow(self): - plugin = plugin or self.plugin or "cf" - if plugin_kwargs is None: - plugin_kwargs = {} - submitter = Submitter(plugin=plugin, **plugin_kwargs) - - if submitter: - with submitter as sub: - self.inputs = attr.evolve(self.inputs, **kwargs) - res = sub(self, environment=environment) - else: # tasks without state could be run without a submitter - res = self._run(rerun=rerun, environment=environment, **kwargs) - return res - - def _modify_inputs(self): - """This method modifies the inputs of the task ahead of its execution: - links/copies upstream files and directories into the destination tasks working directory as required select state array values corresponding to state index (it will try to leave them where they are unless specified or @@ -474,48 +250,39 @@ def _modify_inputs(self): execution (they will be replaced after the task's execution with the original inputs to ensure the tasks checksums are consistent) """ - orig_inputs = { + if self._inputs is not None: + return self._inputs + + from pydra.utils.typing import TypeParser + + self._inputs = { k: v - for k, v in attr.asdict(self.inputs, recurse=False).items() + for k, v in attrs_values(self.definition).items() if not k.startswith("_") } map_copyfiles = {} - input_fields = attr.fields(type(self.inputs)) - for name, value in orig_inputs.items(): - fld = getattr(input_fields, name) - copy_mode, copy_collation = parse_copyfile( - fld, default_collation=self.DEFAULT_COPY_COLLATION - ) - if value is not attr.NOTHING and TypeParser.contains_type( - FileSet, fld.type - ): + fld: "Arg" + for fld in list_fields(self.definition): + name = fld.name + value = self._inputs[name] + if value and TypeParser.contains_type(FileSet, fld.type): copied_value = copy_nested_files( value=value, dest_dir=self.output_dir, - mode=copy_mode, - collation=copy_collation, + mode=fld.copy_mode, + collation=fld.copy_collation, supported_modes=self.SUPPORTED_COPY_MODES, ) if value is not copied_value: map_copyfiles[name] = copied_value - modified_inputs = template_update( - self.inputs, self.output_dir, map_copyfiles=map_copyfiles - ) - assert all(m in orig_inputs for m in modified_inputs), ( - "Modified inputs contain fields not present in original inputs. " - "This is likely a bug." + self._inputs.update( + template_update( + self.definition, output_dir=self.output_dir, map_copyfiles=map_copyfiles + ) ) - for name, orig_value in orig_inputs.items(): - try: - value = modified_inputs[name] - except KeyError: - # Ensure we pass a copy not the original just in case inner - # attributes are modified during execution - value = deepcopy(orig_value) - setattr(self.inputs, name, value) - return orig_inputs - - def _populate_filesystem(self, checksum, output_dir): + return self._inputs + + def _populate_filesystem(self): """ Invoked immediately after the lockfile is generated, this function: - Creates the cache file @@ -527,54 +294,66 @@ def _populate_filesystem(self, checksum, output_dir): # adding info file with the checksum in case the task was cancelled # and the lockfile has to be removed with open(self.cache_dir / f"{self.uid}_info.json", "w") as jsonfile: - json.dump({"checksum": checksum}, jsonfile) - if not self.can_resume and output_dir.exists(): - shutil.rmtree(output_dir) - output_dir.mkdir(parents=False, exist_ok=self.can_resume) + json.dump({"checksum": self.checksum}, jsonfile) + if not self.can_resume and self.output_dir.exists(): + shutil.rmtree(self.output_dir) + self.output_dir.mkdir(parents=False, exist_ok=self.can_resume) + # Save task pkl into the output directory for future reference + save(self.output_dir, task=self) - def _run(self, rerun=False, environment=None, **kwargs): - self.inputs = attr.evolve(self.inputs, **kwargs) - self.inputs.check_fields_input_spec() + def run(self, rerun: bool = False): + """Prepare the task working directory, execute the task definition, and save the + results. - checksum = self.checksum - output_dir = self.output_dir - lockfile = self.cache_dir / (checksum + ".lock") - # Eagerly retrieve cached - see scenarios in __init__() + Parameters + ---------- + rerun : bool + If True, the task will be re-run even if a result already exists. Will + propagated to all tasks within workflow tasks. + """ + # TODO: After these changes have been merged, will refactor this function and + # run_async to use common helper methods for pre/post run tasks + + # checking if the definition is fully resolved and ready to run self.hooks.pre_run(self) - logger.debug("'%s' is attempting to acquire lock on %s", self.name, lockfile) - with SoftFileLock(lockfile): - if not (rerun or self.task_rerun): + logger.debug( + "'%s' is attempting to acquire lock on %s", self.name, self.lockfile + ) + with SoftFileLock(self.lockfile): + if not (rerun): result = self.result() if result is not None and not result.errored: return result cwd = os.getcwd() - self._populate_filesystem(checksum, output_dir) - os.chdir(output_dir) - orig_inputs = self._modify_inputs() - result = Result(output=None, runtime=None, errored=False) + self._populate_filesystem() + os.chdir(self.output_dir) + result = Result( + outputs=None, + runtime=None, + errored=False, + output_dir=self.output_dir, + definition=self.definition, + ) self.hooks.pre_run_task(self) - self.audit.start_audit(odir=output_dir) + self.audit.start_audit(odir=self.output_dir) if self.audit.audit_check(AuditFlag.PROV): self.audit.audit_task(task=self) try: self.audit.monitor() - self._run_task(environment=environment) - result.output = self._collect_outputs(output_dir=output_dir) + self.definition._run(self, rerun) + result.outputs = self.definition.Outputs._from_task(self) except Exception: etype, eval, etr = sys.exc_info() traceback = format_exception(etype, eval, etr) - record_error(output_dir, error=traceback) + record_error(self.output_dir, error=traceback) result.errored = True raise finally: self.hooks.post_run_task(self, result) - self.audit.finalize_audit(result) - save(output_dir, result=result, task=self) + self.audit.finalize_audit(result=result) + save(self.output_dir, result=result, task=self) # removing the additional file with the checksum (self.cache_dir / f"{self.uid}_info.json").unlink() - # Restore original values to inputs - for field_name, field_value in orig_inputs.items(): - setattr(self.inputs, field_name, field_value) os.chdir(cwd) self.hooks.post_run(self, result) # Check for any changes to the input hashes that have occurred during the execution @@ -582,186 +361,60 @@ def _run(self, rerun=False, environment=None, **kwargs): self._check_for_hash_changes() return result - def _collect_outputs(self, output_dir): - output_klass = make_klass(self.output_spec) - output = output_klass( - **{f.name: attr.NOTHING for f in attr.fields(output_klass)} - ) - other_output = output.collect_additional_outputs( - self.inputs, output_dir, self.output_ - ) - return attr.evolve(output, **self.output_, **other_output) - - def split( - self, - splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...], None] = None, - overwrite: bool = False, - cont_dim: ty.Optional[dict] = None, - **inputs, - ): - """ - Run this task parametrically over lists of split inputs. + async def run_async(self, rerun: bool = False) -> Result: + """Prepare the task working directory, execute the task definition asynchronously, + and save the results. NB: only workflows are run asynchronously at the moment. Parameters ---------- - splitter : str or list[str] or tuple[str] or None - the fields which to split over. If splitting over multiple fields, lists of - fields are interpreted as outer-products and tuples inner-products. If None, - then the fields to split are taken from the keyword-arg names. - overwrite : bool, optional - whether to overwrite an existing split on the node, by default False - cont_dim : dict, optional - Container dimensions for specific inputs, used in the splitter. - If input name is not in cont_dim, it is assumed that the input values has - a container dimension of 1, so only the most outer dim will be used for splitting. - **split_inputs - fields to split over, will automatically be wrapped in a StateArray object - and passed to the node inputs - - Returns - ------- - self : TaskBase - a reference to the task + rerun : bool + If True, the task will be re-run even if a result already exists. Will + propagated to all tasks within workflow tasks. """ - if self._lzout: - raise RuntimeError( - f"Cannot split {self} as its output interface has already been accessed" - ) - if splitter is None and inputs: - splitter = list(inputs) - elif splitter: - missing = set(hlpst.unwrap_splitter(splitter)) - set(inputs) - missing = [m for m in missing if not m.startswith("_")] - if missing: - raise ValueError( - f"Split is missing values for the following fields {list(missing)}" - ) - splitter = hlpst.add_name_splitter(splitter, self.name) - # if user want to update the splitter, overwrite has to be True - if self.state and not overwrite and self.state.splitter != splitter: - raise Exception( - "splitter has been already set, " - "if you want to overwrite it - use overwrite=True" - ) - if cont_dim: - for key, vel in cont_dim.items(): - self._cont_dim[f"{self.name}.{key}"] = vel - if inputs: - new_inputs = {} - split_inputs = set( - f"{self.name}.{n}" if "." not in n else n - for n in hlpst.unwrap_splitter(splitter) - if not n.startswith("_") - ) - for inpt_name, inpt_val in inputs.items(): - new_val: ty.Any - if f"{self.name}.{inpt_name}" in split_inputs: # type: ignore - if isinstance(inpt_val, LazyField): - new_val = inpt_val.split(splitter) - elif isinstance(inpt_val, ty.Iterable) and not isinstance( - inpt_val, (ty.Mapping, str) - ): - new_val = StateArray(inpt_val) - else: - raise TypeError( - f"Could not split {inpt_val} as it is not a sequence type" - ) - else: - new_val = inpt_val - new_inputs[inpt_name] = new_val - self.inputs = attr.evolve(self.inputs, **new_inputs) - if not self.state or splitter != self.state.splitter: - self.set_state(splitter) - return self - - def combine( - self, - combiner: ty.Union[ty.List[str], str], - overwrite: bool = False, # **kwargs - ): - """ - Combine inputs parameterized by one or more previous tasks. - - Parameters - ---------- - combiner : list[str] or str - the - overwrite : bool - whether to overwrite an existing combiner on the node - **kwargs : dict[str, Any] - values for the task that will be "combined" before they are provided to the - node - - Returns - ------- - self : TaskBase - a reference to the task - """ - if self._lzout: - raise RuntimeError( - f"Cannot combine {self} as its output interface has already been " - "accessed" - ) - if not isinstance(combiner, (str, list)): - raise Exception("combiner has to be a string or a list") - combiner = hlpst.add_name_combiner(ensure_list(combiner), self.name) - if ( - self.state - and self.state.combiner - and combiner != self.state.combiner - and not overwrite - ): - raise Exception( - "combiner has been already set, " - "if you want to overwrite it - use overwrite=True" + # checking if the definition is fully resolved and ready to run + self.hooks.pre_run(self) + logger.debug( + "'%s' is attempting to acquire lock on %s", self.name, self.lockfile + ) + async with PydraFileLock(self.lockfile): + if not rerun: + result = self.result() + if result is not None and not result.errored: + return result + cwd = os.getcwd() + self._populate_filesystem() + result = Result( + outputs=None, + runtime=None, + errored=False, + output_dir=self.output_dir, + definition=self.definition, ) - if not self.state: - self.split(splitter=None) - # a task can have a combiner without a splitter - # if is connected to one with a splitter; - # self.fut_combiner will be used later as a combiner - self.fut_combiner = combiner - else: # self.state and not self.state.combiner - self.combiner = combiner - self.set_state(splitter=self.state.splitter, combiner=self.combiner) - return self - - def _extract_input_el(self, inputs, inp_nm, ind): - """ - Extracting element of the inputs taking into account - container dimension of the specific element that can be set in self.cont_dim. - If input name is not in cont_dim, it is assumed that the input values has - a container dimension of 1, so only the most outer dim will be used for splitting. - If - """ - if f"{self.name}.{inp_nm}" in self.cont_dim: - return list( - hlpst.flatten( - ensure_list(getattr(inputs, inp_nm)), - max_depth=self.cont_dim[f"{self.name}.{inp_nm}"], - ) - )[ind] - else: - return getattr(inputs, inp_nm)[ind] - - def get_input_el(self, ind): - """Collect all inputs required to run the node (for specific state element).""" - # TODO: doesn't work properly for more cmplicated wf (check if still an issue) - input_ind = self.state.inputs_ind[ind] - inputs_dict = {} - for inp in set(self.input_names): - if f"{self.name}.{inp}" in input_ind: - inputs_dict[inp] = self._extract_input_el( - inputs=self.inputs, - inp_nm=inp, - ind=input_ind[f"{self.name}.{inp}"], - ) - return inputs_dict - # else: - # # todo it never gets here - # breakpoint() - # inputs_dict = {inp: getattr(self.inputs, inp) for inp in self.input_names} - # return None, inputs_dict + self.hooks.pre_run_task(self) + self.audit.start_audit(odir=self.output_dir) + try: + self.audit.monitor() + await self.definition._run_async(self, rerun) + result.outputs = self.definition.Outputs._from_task(self) + except Exception: + etype, eval, etr = sys.exc_info() + traceback = format_exception(etype, eval, etr) + record_error(self.output_dir, error=traceback) + result.errored = True + self._errored = True + raise + finally: + self.hooks.post_run_task(self, result) + self.audit.finalize_audit(result=result) + save(self.output_dir, result=result, task=self) + # removing the additional file with the checksum + (self.cache_dir / f"{self.uid}_info.json").unlink() + os.chdir(cwd) + self.hooks.post_run(self, result) + # Check for any changes to the input hashes that have occurred during the execution + # of the task + self._check_for_hash_changes() + return result def pickle_task(self): """Pickling the tasks with full inputs""" @@ -775,38 +428,24 @@ def pickle_task(self): def done(self): """Check whether the tasks has been finalized and all outputs are stored.""" # if any of the field is lazy, there is no need to check results - if is_lazy(self.inputs): + if has_lazy(self.definition): return False _result = self.result() - if self.state: - # TODO: only check for needed state result - if _result and all(_result): - if self.state.combiner and isinstance(_result[0], list): - for res_l in _result: - if any([res.errored for res in res_l]): - raise ValueError(f"Task {self.name} raised an error") - return True - else: - if any([res.errored for res in _result]): - raise ValueError(f"Task {self.name} raised an error") - return True - # checking if self.result() is not an empty list only because - # the states_ind is an empty list (input field might be an empty list) - elif ( - _result == [] - and hasattr(self.state, "states_ind") - and self.state.states_ind == [] - ): + if _result: + if _result.errored: + self._errored = True + raise ValueError(f"Task {self.name!r} raised an error") + else: return True - else: - if _result: - if _result.errored: - self._errored = True - raise ValueError(f"Task {self.name} raised an error") - else: - return True return False + @property + def run_start_time(self) -> datetime | None: + """Check whether the task is currently running.""" + if not self.lockfile.exists(): + return None + return datetime.fromtimestamp(self.lockfile.stat().st_ctime) + def _combined_output(self, return_inputs=False): combined_results = [] for gr, ind_l in self.state.final_combined_ind_mapping.items(): @@ -817,7 +456,7 @@ def _combined_output(self, return_inputs=False): return None if return_inputs is True or return_inputs == "val": result = (self.state.states_val[ind], result) - elif return_inputs == "ind": + elif return_inputs is True or return_inputs == "ind": result = (self.state.states_ind[ind], result) combined_results_gr.append(result) combined_results.append(combined_results_gr) @@ -827,7 +466,7 @@ def _combined_output(self, return_inputs=False): else: return combined_results - def result(self, state_index=None, return_inputs=False): + def result(self, return_inputs=False): """ Retrieve the outcomes of this particular task. @@ -844,79 +483,40 @@ def result(self, state_index=None, return_inputs=False): result : Result the result of the task """ - # TODO: check if result is available in load_result and - # return a future if not if self.errored: - return Result(output=None, runtime=None, errored=True) - if self.state: - if state_index is None: - # if state_index=None, collecting all results - if self.state.combiner: - return self._combined_output(return_inputs=return_inputs) - else: - results = [] - for ind in range(len(self.state.inputs_ind)): - checksum = self.checksum_states(state_index=ind) - result = load_result(checksum, self.cache_locations) - if result is None: - return None - results.append(result) - if return_inputs is True or return_inputs == "val": - return list(zip(self.state.states_val, results)) - elif return_inputs == "ind": - return list(zip(self.state.states_ind, results)) - else: - return results - else: # state_index is not None - if self.state.combiner: - return self._combined_output(return_inputs=return_inputs)[ - state_index - ] - result = load_result( - self.checksum_states(state_index), self.cache_locations - ) - if return_inputs is True or return_inputs == "val": - return (self.state.states_val[state_index], result) - elif return_inputs == "ind": - return (self.state.states_ind[state_index], result) - else: - return result - else: - if state_index is not None: - raise ValueError("Task does not have a state") - checksum = self.checksum - result = load_result(checksum, self.cache_locations) - if result and result.errored: - self._errored = True - if return_inputs is True or return_inputs == "val": - inputs_val = { - f"{self.name}.{inp}": getattr(self.inputs, inp) - for inp in self.input_names - } - return (inputs_val, result) - elif return_inputs == "ind": - inputs_ind = {f"{self.name}.{inp}": None for inp in self.input_names} - return (inputs_ind, result) - else: - return result + return Result( + outputs=None, + runtime=None, + errored=True, + output_dir=self.output_dir, + definition=self.definition, + ) - def _reset(self): - """Reset the connections between inputs and LazyFields.""" - for field in attr_fields(self.inputs): - if field.name in self.inp_lf: - setattr(self.inputs, field.name, self.inp_lf[field.name]) - if is_workflow(self): - for task in self.graph.nodes: - task._reset() + checksum = self.checksum + result = load_result(checksum, self.cache_locations) + if result and result.errored: + self._errored = True + if return_inputs is True or return_inputs == "val": + inputs_val = { + f"{self.name}.{inp}": getattr(self.definition, inp) + for inp in self.input_names + } + return (inputs_val, result) + elif return_inputs == "ind": + inputs_ind = {f"{self.name}.{inp}": None for inp in self.input_names} + return (inputs_ind, result) + else: + return result def _check_for_hash_changes(self): - hash_changes = self.inputs.hash_changes() + hash_changes = self.definition._hash_changes() details = "" for changed in hash_changes: - field = getattr(attr.fields(type(self.inputs)), changed) - val = getattr(self.inputs, changed) + field = getattr(attr.fields(type(self.definition)), changed) + hash_function(getattr(self.definition, changed)) + val = getattr(self.definition, changed) field_type = type(val) - if issubclass(field.type, FileSet): + if inspect.isclass(field.type) and issubclass(field.type, FileSet): details += ( f"- {changed}: value passed to the {field.type} field is of type " f"{field_type} ('{val}'). If it is intended to contain output data " @@ -932,7 +532,7 @@ def _check_for_hash_changes(self): f"- {changed}: the {field_type} object passed to the {field.type}" f"field appears to have an unstable hash. This could be due to " "a stochastic/non-thread-safe attribute(s) of the object\n\n" - f"The {field.type}.__bytes_repr__() method can be implemented to " + f'A "bytes_repr" method for {field.type!r} can be implemented to ' "bespoke hashing methods based only on the stable attributes for " f"the `{field_type.__module__}.{field_type.__name__}` type. " f"See pydra/utils/hash.py for examples. Value: {val}\n" @@ -940,608 +540,364 @@ def _check_for_hash_changes(self): if hash_changes: raise RuntimeError( f"Input field hashes have changed during the execution of the " - f"'{self.name}' {type(self).__name__}.\n\n{details}" + f"'{self.name}' task of {type(self)} type.\n\n{details}" ) logger.debug( "Input values and hashes for '%s' %s node:\n%s\n%s", self.name, type(self).__name__, - self.inputs, - self.inputs._hashes, + self.definition, + self.definition._hashes, ) + def _write_notebook(self): + """Writes a notebook into the""" + raise NotImplementedError + SUPPORTED_COPY_MODES = FileSet.CopyMode.any DEFAULT_COPY_COLLATION = FileSet.CopyCollation.any -def _sanitize_spec( - spec: ty.Union[ - SpecInfo, ty.List[str], ty.Dict[str, ty.Type[ty.Any]], BaseSpec, None - ], - wf_name: str, - spec_name: str, - allow_empty: bool = False, -) -> SpecInfo: - """Makes sure the provided input specifications are valid. +logger = logging.getLogger("pydra") + +OutputsType = ty.TypeVar("OutputType", bound=TaskOutputs) +WorkflowOutputsType = ty.TypeVar("OutputType", bound=WorkflowOutputs) + - If the input specification is a list of strings, this will - build a proper SpecInfo object out of it. +@attrs.define(auto_attribs=False) +class Workflow(ty.Generic[WorkflowOutputsType]): + """A workflow, constructed from a workflow definition Parameters ---------- - spec : SpecInfo or List[str] or Dict[str, type] - Specification to be sanitized. - wf_name : str - The name of the workflow for which the input specifications - spec_name : str - name given to generated SpecInfo object - - Returns - ------- - spec : SpecInfo - Sanitized specification. - - Raises - ------ - ValueError - If provided `spec` is None. + name : str + The name of the workflow + inputs : TaskDef + The input definition of the workflow + outputs : TaskDef + The output definition of the workflow """ - graph_checksum_input = ("_graph_checksums", ty.Any) - if spec: - if isinstance(spec, SpecInfo): - if BaseSpec not in spec.bases: - raise ValueError("Provided SpecInfo must have BaseSpec as its base.") - if "_graph_checksums" not in {f[0] for f in spec.fields}: - spec.fields.insert(0, graph_checksum_input) - return spec + + name: str = attrs.field() + inputs: WorkflowDef[WorkflowOutputsType] = attrs.field() + outputs: WorkflowOutputsType = attrs.field() + _nodes: dict[str, Node] = attrs.field(factory=dict) + + def __repr__(self): + return f"Workflow(name={self.name!r}, defn={self.inputs!r})" + + @classmethod + def clear_cache( + cls, definition: WorkflowDef[WorkflowOutputsType] | None = None + ) -> None: + """Clear the cache of constructed workflows""" + if definition is None: + cls._constructed_cache = defaultdict(lambda: defaultdict(dict)) else: - base = BaseSpec - if isinstance(spec, list): - typed_spec = zip(spec, itertools.repeat(ty.Any)) - elif isinstance(spec, dict): - typed_spec = spec.items() # type: ignore - elif isinstance(spec, BaseSpec): - base = spec - typed_spec = [] - else: - raise TypeError( - f"Unrecognised spec type, {spec}, should be SpecInfo, list or dict" - ) - return SpecInfo( - name=spec_name, - fields=[graph_checksum_input] - + [ - ( - nm, - attr.ib( - type=tp, - metadata={ - "help_string": f"{nm} input from {wf_name} workflow" - }, - ), - ) - for nm, tp in typed_spec - ], - bases=(base,), - ) - elif allow_empty: - return None - else: - raise ValueError(f'Empty "{spec_name}" spec provided to Workflow {wf_name}.') + cls._constructed_cache[hash_function(definition)] = defaultdict(dict) + @classmethod + def construct( + cls, definition: WorkflowDef[WorkflowOutputsType], dont_cache: bool = False + ) -> Self: + """Construct a workflow from a definition, caching the constructed worklow""" -class Workflow(TaskBase): - """A composite task with structure of computational graph.""" + # Check the previously constructed workflows to see if a workflow has been + # constructed for the given set of inputs, or a less-specific set (i.e. with a + # super-set of lazy inputs), and use that if it exists - def __init__( + non_lazy_vals = { + n: v for n, v in attrs_values(definition).items() if not is_lazy(v) + } + non_lazy_keys = frozenset(non_lazy_vals) + hash_cache = Cache() # share the hash cache to avoid recalculations + non_lazy_hash = hash_function(non_lazy_vals, cache=hash_cache) + defn_hash = hash_function(type(definition), cache=hash_cache) + # Check for same non-lazy inputs + try: + defn_cache = cls._constructed_cache[defn_hash] + except KeyError: + pass + else: + if ( + non_lazy_keys in defn_cache + and non_lazy_hash in defn_cache[non_lazy_keys] + ): + return defn_cache[non_lazy_keys][non_lazy_hash] + # Check for supersets of lazy inputs + for key_set, key_set_cache in defn_cache.items(): + if key_set.issubset(non_lazy_keys): + subset_vals = { + k: v for k, v in non_lazy_vals.items() if k in key_set + } + subset_hash = hash_function(subset_vals, cache=hash_cache) + if subset_hash in key_set_cache: + return key_set_cache[subset_hash] + + # Initialise the outputs of the workflow + outputs = definition.Outputs( + **{f.name: attrs.NOTHING for f in attrs.fields(definition.Outputs)} + ) + + # Initialise the lzin fields + lazy_spec = copy(definition) + workflow = Workflow( + name=type(definition).__name__, + inputs=lazy_spec, + outputs=outputs, + ) + # Set lazy inputs to the workflow, need to do it after the workflow is initialised + # so a back ref to the workflow can be set in the lazy field + for field in list_fields(definition): + if field.name not in non_lazy_keys: + setattr( + lazy_spec, + field.name, + LazyInField( + workflow=workflow, + field=field.name, + type=field.type, + ), + ) + + input_values = attrs_values(lazy_spec) + constructor = input_values.pop("constructor") + # Call the user defined constructor to set the outputs + output_lazy_fields = constructor(**input_values) + # Check to see whether any mandatory inputs are not set + for node in workflow.nodes: + node._definition._check_rules() + # Check that the outputs are set correctly, either directly by the constructor + # or via returned values that can be zipped with the output names + if output_lazy_fields: + if not isinstance(output_lazy_fields, (list, tuple)): + output_lazy_fields = [output_lazy_fields] + output_fields = list_fields(definition.Outputs) + if len(output_lazy_fields) != len(output_fields): + raise ValueError( + f"Expected {len(output_fields)} outputs, got " + f"{len(output_lazy_fields)} ({output_lazy_fields})" + ) + for outpt, outpt_lf in zip(output_fields, output_lazy_fields): + # Automatically combine any uncombined state arrays into a single lists + outpt_lf._type = State.combine_state_arrays(outpt_lf._type) + setattr(outputs, outpt.name, outpt_lf) + else: + if unset_outputs := [ + a for a, v in attrs_values(outputs).items() if v is attrs.NOTHING + ]: + raise ValueError( + f"Expected outputs {unset_outputs} to be set by the " + f"constructor of {workflow!r}" + ) + if not dont_cache: + cls._constructed_cache[defn_hash][non_lazy_keys][non_lazy_hash] = workflow + + return workflow + + @classmethod + def under_construction(cls) -> "Workflow[ty.Any]": + """Access the under_construction variable by iterating up through the call stack.""" + frame = inspect.currentframe() + while frame: + # Find the frame where the construct method was called + if ( + frame.f_code.co_name == "construct" + and frame.f_locals.get("cls") is cls + and "workflow" in frame.f_locals + ): + return frame.f_locals["workflow"] # local var "workflow" in construct + frame = frame.f_back + raise RuntimeError( + "No workflow is currently under construction (i.e. did not find a " + "`Workflow.construct` in the current call stack" + ) + + def add( self, - name, - audit_flags: AuditFlag = AuditFlag.NONE, - cache_dir=None, - cache_locations=None, - input_spec: ty.Optional[ - ty.Union[ty.List[ty.Text], ty.Dict[ty.Text, ty.Type[ty.Any]], SpecInfo] - ] = None, - cont_dim=None, - messenger_args=None, - messengers=None, - output_spec: ty.Optional[ - ty.Union[ty.List[str], ty.Dict[str, type], SpecInfo, BaseSpec] - ] = None, - rerun=False, - propagate_rerun=True, - **kwargs, - ): - """ - Initialize a workflow. + task_def: TaskDef[OutputsType], + name: str | None = None, + environment: Environment | None = None, + hooks: TaskHooks | None = None, + ) -> OutputsType: + """Add a node to the workflow Parameters ---------- - name : :obj:`str` - Unique name of this node - audit_flags : :class:`AuditFlag`, optional - Configure provenance tracking. Default is no provenance tracking. - See available flags at :class:`~pydra.utils.messenger.AuditFlag`. - cache_dir : :obj:`os.pathlike` - Set a custom directory of previously computed nodes. - cache_locations : - TODO - inputs : :obj:`typing.Text`, or :class:`File`, or :obj:`dict`, or `None`. - Set particular inputs to this node. - cont_dim : :obj:`dict`, or `None` - Container dimensions for input fields, - if any of the container should be treated as a container - messenger_args : - TODO - messengers : - TODO - output_spec : - TODO + task_spec : TaskDef + The definition of the task to add to the workflow as a node + name : str, optional + The name of the node, by default it will be the name of the task definition + class + environment : Environment, optional + The environment to run the task in, such as the Docker or Singularity container, + by default it will be the "native" + hooks : TaskHooks, optional + The hooks to run before or after the task, by default no hooks will be run + Returns + ------- + OutputType + The outputs definition of the node """ - self.input_spec = _sanitize_spec(input_spec, name, "Inputs") - self.output_spec = _sanitize_spec( - output_spec, name, "Outputs", allow_empty=True - ) + from pydra.engine.environments import Native - if name in dir(self): + if name is None: + name = type(task_def).__name__ + if name in self._nodes: + raise ValueError(f"Node with name {name!r} already exists in the workflow") + if ( + environment + and not isinstance(environment, Native) + and task_def._task_type != "shell" + ): raise ValueError( - "Cannot use names of attributes or methods as workflow name" + "Environments can only be used with 'shell' tasks not " + f"{task_def._task_type!r} tasks ({task_def!r})" ) - self.name = name - - super().__init__( + node = Node[OutputsType]( name=name, - inputs=kwargs, - cont_dim=cont_dim, - cache_dir=cache_dir, - cache_locations=cache_locations, - audit_flags=audit_flags, - messengers=messengers, - messenger_args=messenger_args, - rerun=rerun, - ) - - self.graph = DiGraph(name=name) - self.name2obj = {} - self._lzin = None - self._pre_split = ( - False # To signify if the workflow has been split on task load or not + definition=task_def, + workflow=self, + environment=environment, + hooks=hooks, ) + self._nodes[name] = node + return node.lzout - # store output connections - self._connections = None - # propagating rerun if task_rerun=True - self.propagate_rerun = propagate_rerun - - @cached_property - def lzin(self): - return LazyIn(self) - - def __getattr__(self, name): - if name in self.name2obj: - return self.name2obj[name] - return self.__getattribute__(name) + def __getitem__(self, key: str) -> Node: + return self._nodes[key] @property - def nodes(self): - """Get the list of node names.""" - return self.name2obj.values() + def nodes(self) -> ty.Iterable[Node]: + return self._nodes.values() @property - def graph_sorted(self): - """Get a sorted graph representation of the workflow.""" - return self.graph.sorted_nodes + def node_names(self) -> list[str]: + return list(self._nodes) - @property - def checksum(self): - """Calculates the unique checksum of the task. - Used to create specific directory name for task that are run; - and to create nodes checksums needed for graph checksums - (before the tasks have inputs etc.) - """ - # if checksum is called before run the _graph_checksums is not ready - if is_workflow(self) and self.inputs._graph_checksums is attr.NOTHING: - self.inputs._graph_checksums = { - nd.name: nd.checksum for nd in self.graph_sorted - } + # Used to cache the constructed workflows by their hashed input values + _constructed_cache: dict[ + str, dict[frozenset[str], dict[str, "Workflow[ty.Any]"]] + ] = defaultdict(lambda: defaultdict(dict)) - input_hash = self.inputs.hash - if not self.state: - self._checksum = create_checksum( - self.__class__.__name__, self._checksum_wf(input_hash) - ) - else: - self._checksum = create_checksum( - self.__class__.__name__, - self._checksum_wf(input_hash, with_splitter=True), - ) - return self._checksum + def execution_graph(self, submitter: "Submitter") -> DiGraph: + from pydra.engine.submitter import NodeExecution - def _checksum_wf(self, input_hash, with_splitter=False): - """creating hash value for workflows - includes connections and splitter if with_splitter is True - """ - connection_hash = hash_function(self._connections) - hash_list = [input_hash, connection_hash] - if with_splitter and self.state: - # including splitter in the hash - splitter_hash = hash_function(self.state.splitter) - hash_list.append(splitter_hash) - return hash_function(hash_list) - - def add(self, task): - """ - Add a task to the workflow. + exec_nodes = [NodeExecution(n, submitter, workflow=self) for n in self.nodes] + graph = self._create_graph(exec_nodes) + # Set the graph attribute of the nodes so lazy fields can be resolved as tasks + # are created + for node in exec_nodes: + node.graph = graph + return graph - Parameters - ---------- - task : :class:`TaskBase` - The task to be added. + def graph(self, detailed: bool = False) -> DiGraph: + return self._create_graph(self.nodes, detailed=detailed) + def _create_graph( + self, nodes: "list[Node | NodeExecution]", detailed: bool = False + ) -> DiGraph: """ - if task.name in dir(self): - raise ValueError( - "Cannot use names of workflow attributes or methods as task name" - ) - if task.name in self.name2obj: - raise ValueError( - "Another task named {} is already added to the workflow".format( - task.name - ) - ) - self.name2obj[task.name] = task - - if not is_task(task): - raise ValueError(f"Unknown workflow element: {task!r}") - self.graph.add_nodes(task) - self._last_added = task - logger.debug(f"Added {task}") - return self - - def create_connections(self, task, detailed=False): - """ - Add and connect a particular task to existing nodes in the workflow. + Connects a particular task to existing nodes in the workflow. Parameters ---------- - task : :class:`TaskBase` - The task to be added. - detailed : :obj:`bool` - If True, `add_edges_description` is run for self.graph to add - a detailed descriptions of the connections (input/output fields names) + detailed : bool + If True, `add_edges_description` is run a detailed descriptions of the + connections (input/output fields names) + node_klass : type, optional + The class to use for the nodes in the workflow. If provided the node is + wrapped by an instance of the class, if None the node is added as is, + by default None + + Returns + ------- + DiGraph + The graph of the workflow """ + graph: DiGraph = DiGraph() + for node in nodes: + graph.add_nodes(node) # TODO: create connection is run twice - other_states = {} - for field in attr_fields(task.inputs): - val = getattr(task.inputs, field.name) - if isinstance(val, LazyField): - # saving all connections with LazyFields - task.inp_lf[field.name] = val - # adding an edge to the graph if task id expecting output from a different task - if val.name != self.name: - # checking if the connection is already in the graph - if (getattr(self, val.name), task) not in self.graph.edges: - self.graph.add_edges((getattr(self, val.name), task)) - if detailed: - self.graph.add_edges_description( - (task.name, field.name, val.name, val.field) - ) - logger.debug("Connecting %s to %s", val.name, task.name) - # adding a state from the previous task to other_states - if ( - getattr(self, val.name).state - and getattr(self, val.name).state.splitter_rpn_final - ): - # variables that are part of inner splitters should be treated as a containers + for node in nodes: + other_states = {} + for field in attrs_fields(node.inputs): + lf = node._definition[field.name] + if isinstance(lf, LazyOutField): + # adding an edge to the graph if task id expecting output from a different task + if lf._node.name != self.name: + # checking if the connection is already in the graph + if (graph.node(lf._node.name), node) not in graph.edges: + graph.add_edges((graph.node(lf._node.name), node)) + if detailed: + graph.add_edges_description( + (node.name, field.name, lf._node.name, lf._field) + ) + logger.debug("Connecting %s to %s", lf._node.name, node.name) + # adding a state from the previous task to other_states if ( - task.state - and f"{task.name}.{field.name}" in task.state.splitter + graph.node(lf._node.name).state + and graph.node(lf._node.name).state.splitter_rpn_final ): - task._inner_cont_dim[f"{task.name}.{field.name}"] = 1 - # adding task_name: (task.state, [a field from the connection] - if val.name not in other_states: - other_states[val.name] = ( - getattr(self, val.name).state, - [field.name], + # variables that are part of inner splitters should be + # treated as a containers + if ( + node.state + and f"{node.name}.{field.name}" in node.state.splitter + ): + node.state._inner_cont_dim[ + f"{node.name}.{field.name}" + ] = 1 + # adding task_name: (task.state, [a field from the connection] + if lf._node.name not in other_states: + other_states[lf._node.name] = ( + graph.node(lf._node.name).state, + [field.name], + ) + else: + # if the task already exist in other_state, + # additional field name should be added to the list of fields + other_states[lf._node.name][1].append(field.name) + else: # LazyField with the wf input + # connections with wf input should be added to the detailed graph description + if detailed: + graph.add_edges_description( + (node.name, field.name, lf._node.name, lf._field) ) - else: - # if the task already exist in other_state, - # additional field name should be added to the list of fields - other_states[val.name][1].append(field.name) - else: # LazyField with the wf input - # connections with wf input should be added to the detailed graph description - if detailed: - self.graph.add_edges_description( - (task.name, field.name, val.name, val.field) - ) - - # if task has connections state has to be recalculated - if other_states: - if hasattr(task, "fut_combiner"): - combiner = task.fut_combiner - else: - combiner = None - - if task.state: - task.state.update_connections( - new_other_states=other_states, new_combiner=combiner - ) - else: - task.state = state.State( - task.name, - splitter=None, - other_states=other_states, - combiner=combiner, - ) - - async def _run(self, submitter=None, rerun=False, **kwargs): - # output_spec needs to be set using set_output or at workflow initialization - if self.output_spec is None: - raise ValueError( - "Workflow output cannot be None, use set_output to define output(s)" - ) - # creating connections that were defined after adding tasks to the wf - self._connect_and_propagate_to_tasks( - propagate_rerun=self.task_rerun and self.propagate_rerun - ) - - checksum = self.checksum - output_dir = self.output_dir - lockfile = self.cache_dir / (checksum + ".lock") - self.hooks.pre_run(self) - logger.debug( - "'%s' is attempting to acquire lock on %s with Pydra lock", - self.name, - lockfile, - ) - async with PydraFileLock(lockfile): - if not (rerun or self.task_rerun): - result = self.result() - if result is not None and not result.errored: - return result - cwd = os.getcwd() - self._populate_filesystem(checksum, output_dir) - result = Result(output=None, runtime=None, errored=False) - self.hooks.pre_run_task(self) - self.audit.start_audit(odir=output_dir) - try: - self.audit.monitor() - await self._run_task(submitter, rerun=rerun) - result.output = self._collect_outputs() - except Exception: - etype, eval, etr = sys.exc_info() - traceback = format_exception(etype, eval, etr) - record_error(output_dir, error=traceback) - result.errored = True - self._errored = True - raise - finally: - self.hooks.post_run_task(self, result) - self.audit.finalize_audit(result=result) - save(output_dir, result=result, task=self) - # removing the additional file with the checksum - (self.cache_dir / f"{self.uid}_info.json").unlink() - os.chdir(cwd) - self.hooks.post_run(self, result) - # Check for any changes to the input hashes that have occurred during the execution - # of the task - self._check_for_hash_changes() - return result - async def _run_task(self, submitter, rerun=False, environment=None): - if not submitter: - raise Exception("Submitter should already be set.") - for nd in self.graph.nodes: - if nd.allow_cache_override: - nd.cache_dir = self.cache_dir - # at this point Workflow is stateless so this should be fine - await submitter.expand_workflow(self, rerun=rerun) - - def set_output( - self, - connections: ty.Union[ - ty.Tuple[str, LazyField], ty.List[ty.Tuple[str, LazyField]] - ], - ): - """ - Set outputs of the workflow by linking them with lazy outputs of tasks - - Parameters - ---------- - connections : tuple[str, LazyField] or list[tuple[str, LazyField]] or None - single or list of tuples linking the name of the output to a lazy output - of a task in the workflow. - """ - from ..utils.typing import TypeParser - - if self._connections is None: - self._connections = [] - if isinstance(connections, tuple) and len(connections) == 2: - new_connections = [connections] - elif isinstance(connections, list) and all( - [len(el) == 2 for el in connections] - ): - new_connections = connections - elif isinstance(connections, dict): - new_connections = list(connections.items()) - else: - raise TypeError( - "Connections can be a 2-elements tuple, a list of these tuples, or dictionary" - ) - # checking if a new output name is already in the connections - connection_names = [name for name, _ in self._connections] - if self.output_spec: - output_types = { - a.name: a.type for a in attr.fields(make_klass(self.output_spec)) - } - else: - output_types = {} - # Check for type matches with explicitly defined outputs - conflicting = [] - type_mismatches = [] - for conn_name, lazy_field in new_connections: - if conn_name in connection_names: - conflicting.append(conn_name) - try: - output_type = output_types[conn_name] - except KeyError: - pass - else: - if not TypeParser.matches_type(lazy_field.type, output_type): - type_mismatches.append((conn_name, output_type, lazy_field.type)) - if conflicting: - raise ValueError(f"the output names {conflicting} are already set") - if type_mismatches: - raise TypeError( - f"the types of the following outputs of {self} don't match their declared types: " - + ", ".join( - f"{n} (expected: {ex}, provided: {p})" - for n, ex, p in type_mismatches - ) - ) - self._connections += new_connections - fields = [] - for con in self._connections: - wf_out_nm, lf = con - task_nm, task_out_nm = lf.name, lf.field - if task_out_nm == "all_": - help_string = f"all outputs from {task_nm}" - fields.append((wf_out_nm, dict, {"help_string": help_string})) - else: - from ..utils.typing import TypeParser - - # getting information about the output field from the task output_spec - # providing proper type and some help string - task_output_spec = getattr(self, task_nm).output_spec - out_fld = attr.fields_dict(make_klass(task_output_spec))[task_out_nm] - help_string = ( - f"{out_fld.metadata.get('help_string', '')} (from {task_nm})" - ) - if TypeParser.get_origin(lf.type) is StateArray: - type_ = TypeParser.get_item_type(lf.type) + # if task has connections state has to be recalculated + if other_states: + if hasattr(node, "fut_combiner"): + combiner = node.fut_combiner else: - type_ = lf.type - fields.append((wf_out_nm, type_, {"help_string": help_string})) - self.output_spec = SpecInfo(name="Output", fields=fields, bases=(BaseSpec,)) - logger.info("Added %s to %s", self.output_spec, self) - - def _collect_outputs(self): - output_klass = make_klass(self.output_spec) - output = output_klass( - **{f.name: attr.NOTHING for f in attr.fields(output_klass)} - ) - # collecting outputs from tasks - output_wf = {} - for name, val in self._connections: - if not isinstance(val, LazyField): - raise ValueError("all connections must be lazy") - try: - val_out = val.get_value(self) - output_wf[name] = val_out - except (ValueError, AttributeError) as e: - output_wf[name] = None - # checking if the tasks has predecessors that raises error - if isinstance(getattr(self, val.name)._errored, list): - raise ValueError( - f"Tasks {getattr(self, val.name)._errored} raised an error" + combiner = None + + if node.state: + node.state.update_connections( + new_other_states=other_states, new_combiner=combiner ) else: - if isinstance(getattr(self, val.name).output_dir, list): - err_file = [ - el / "_error.pklz" - for el in getattr(self, val.name).output_dir - ] - if not all(e.exists() for e in err_file): - raise e - else: - err_file = getattr(self, val.name).output_dir / "_error.pklz" - if not Path(err_file).exists(): - raise e - raise ValueError( - f"Task {val.name} raised an error, full crash report is here: " - f"{err_file}" + node.state = state.State( + node.name, + splitter=None, + other_states=other_states, + combiner=combiner, ) - return attr.evolve(output, **output_wf) - - def create_dotfile(self, type="simple", export=None, name=None, output_dir=None): - """creating a graph - dotfile and optionally exporting to other formats""" - outdir = output_dir if output_dir is not None else self.cache_dir - if not name: - name = f"graph_{self.name}" - if type == "simple": - for task in self.graph.nodes: - self.create_connections(task) - dotfile = self.graph.create_dotfile_simple(outdir=outdir, name=name) - elif type == "nested": - for task in self.graph.nodes: - self.create_connections(task) - dotfile = self.graph.create_dotfile_nested(outdir=outdir, name=name) - elif type == "detailed": - # create connections with detailed=True - for task in self.graph.nodes: - self.create_connections(task, detailed=True) - # adding wf outputs - for wf_out, lf in self._connections: - self.graph.add_edges_description((self.name, wf_out, lf.name, lf.field)) - dotfile = self.graph.create_dotfile_detailed(outdir=outdir, name=name) - else: - raise Exception( - f"type of the graph can be simple, detailed or nested, " - f"but {type} provided" - ) - if not export: - return dotfile - else: - if export is True: - export = ["png"] - elif isinstance(export, str): - export = [export] - formatted_dot = [] - for ext in export: - formatted_dot.append(self.graph.export_graph(dotfile=dotfile, ext=ext)) - return dotfile, formatted_dot - - def _connect_and_propagate_to_tasks( - self, - *, - propagate_rerun=False, - override_task_caches=False, - ): - """ - Visit each node in the graph and create the connections. - Additionally checks if all tasks should be rerun. - """ - for task in self.graph.nodes: - self.create_connections(task) - # if workflow has task_rerun=True and propagate_rerun=True, - # it should be passed to the tasks - if propagate_rerun: - task.task_rerun = True - # if the task is a wf, than the propagate_rerun should be also set - if is_workflow(task): - task.propagate_rerun = True - - # ported from Submitter.__call__ - # TODO: no prepare state ? - if override_task_caches and task.allow_cache_override: - task.cache_dir = self.cache_dir - task.cache_locations = task._cache_locations + self.cache_locations - - -def is_task(obj): - """Check whether an object looks like a task.""" - return hasattr(obj, "_run_task") + return graph def is_workflow(obj): """Check whether an object is a :class:`Workflow` instance.""" - return isinstance(obj, Workflow) + from pydra.engine.specs import WorkflowDef + from pydra.engine.core import Workflow + + return isinstance(obj, (WorkflowDef, Workflow)) -def is_lazy(obj): - """Check whether an object has any field that is a Lazy Field""" - for f in attr_fields(obj): - if isinstance(getattr(obj, f.name), LazyField): +def has_lazy(obj): + """Check whether an object has lazy fields.""" + for f in attrs_fields(obj): + if is_lazy(getattr(obj, f.name)): return True return False diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py index 0c57008058..a13f5bf75f 100644 --- a/pydra/engine/environments.py +++ b/pydra/engine/environments.py @@ -1,6 +1,18 @@ +import typing as ty +import os +from copy import copy from .helpers import execute - from pathlib import Path +import logging +from fileformats.generic import FileSet +from pydra.engine.helpers import list_fields +from pydra.utils.typing import TypeParser + +logger = logging.getLogger("pydra") + +if ty.TYPE_CHECKING: + from pydra.engine.core import Task + from pydra.engine.specs import ShellDef class Environment: @@ -14,7 +26,7 @@ class Environment: def setup(self): pass - def execute(self, task): + def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: """ Execute the task in the environment. @@ -25,7 +37,7 @@ def execute(self, task): Returns ------- - output + output: dict[str, Any] Output of the task. """ raise NotImplementedError @@ -39,12 +51,13 @@ class Native(Environment): Native environment, i.e. the tasks are executed in the current python environment. """ - def execute(self, task): + def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: keys = ["return_code", "stdout", "stderr"] - values = execute(task.command_args(), strip=task.strip) + cmd_args = task.definition._command_args(values=task.inputs) + values = execute(cmd_args) output = dict(zip(keys, values)) if output["return_code"]: - msg = f"Error running '{task.name}' task with {task.command_args()}:" + msg = f"Error running '{task.name}' task with {cmd_args}:" if output["stderr"]: msg += "\n\nstderr:\n" + output["stderr"] if output["stdout"]: @@ -83,20 +96,99 @@ def bind(self, loc, mode="ro"): loc_abs = Path(loc).absolute() return f"{loc_abs}:{self.root}{loc_abs}:{mode}" + def get_bindings( + self, task: "Task", root: str | None = None + ) -> tuple[dict[str, tuple[str, str]], dict[str, tuple[Path, ...]]]: + """Return bindings necessary to run task in an alternative root. + + This is primarily intended for contexts when a task is going + to be run in a container with mounted volumes. + + Arguments + --------- + root: str, optional + + + Returns + ------- + bindings: dict + Mapping from paths in the host environment to the target environment + """ + from pydra.design import shell + + bindings: dict[str, tuple[str, str]] = {} + value_updates: dict[str, tuple[Path, ...]] = {} + if root is None: + return bindings + fld: shell.arg + for fld in list_fields(task.definition): + if TypeParser.contains_type(FileSet, fld.type): + value: FileSet | None = task.inputs[fld.name] + if not value: + continue + + copy_file = fld.copy_mode == FileSet.CopyMode.copy + + def map_path(fileset: os.PathLike | FileSet) -> Path: + host_path, env_path = fileset.parent, Path( + f"{root}{fileset.parent}" + ) + + # Default to mounting paths as read-only, but respect existing modes + bindings[host_path] = ( + env_path, + "rw" if copy_file or isinstance(fld, shell.outarg) else "ro", + ) + return ( + env_path / fileset.name + if isinstance(fileset, os.PathLike) + else tuple(env_path / rel for rel in fileset.relative_fspaths) + ) + + # Provide updated in-container paths to the command to be run. If a + # fs-object, which resolves to a single path, just pass in the name of + # that path relative to the location in the mount point in the container. + # If it is a more complex file-set with multiple paths, then it is converted + # into a tuple of paths relative to the base of the fileset. + if TypeParser.matches(value, os.PathLike | FileSet): + value_updates[fld.name] = map_path(value) + elif TypeParser.matches(value, ty.Sequence[FileSet | os.PathLike]): + mapped_value = [] + for val in value: + mapped_val = map_path(val) + if isinstance(mapped_val, tuple): + mapped_value.extend(mapped_val) + else: + mapped_value.append(mapped_val) + value_updates[fld.name] = mapped_value + else: + logger.debug( + "No support for generating bindings for %s types " "(%s)", + type(value), + value, + ) + + # Add the cache directory to the list of mounts + bindings[task.cache_dir] = (f"{self.root}/{task.cache_dir}", "rw") + + # Update values with the new paths + values = copy(task.inputs) + values.update(value_updates) + + return bindings, values + class Docker(Container): """Docker environment.""" - def execute(self, task): + def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: docker_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts = task.get_bindings(root=self.root) + mounts, values = self.get_bindings(task=task, root=self.root) docker_args = [ "docker", "run", - "-v", - self.bind(task.cache_dir, "rw"), *self.xargs, ] docker_args.extend( @@ -108,8 +200,7 @@ def execute(self, task): keys = ["return_code", "stdout", "stderr"] values = execute( - docker_args + [docker_img] + task.command_args(root=self.root), - strip=task.strip, + docker_args + [docker_img] + task.definition._command_args(values=values), ) output = dict(zip(keys, values)) if output["return_code"]: @@ -123,17 +214,15 @@ def execute(self, task): class Singularity(Container): """Singularity environment.""" - def execute(self, task): + def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: singularity_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts = task.get_bindings(root=self.root) + mounts, values = self.get_bindings(task=task, root=self.root) # todo adding xargsy etc singularity_args = [ "singularity", "exec", - "-B", - self.bind(task.cache_dir, "rw"), *self.xargs, ] singularity_args.extend( @@ -145,8 +234,9 @@ def execute(self, task): keys = ["return_code", "stdout", "stderr"] values = execute( - singularity_args + [singularity_img] + task.command_args(root=self.root), - strip=task.strip, + singularity_args + + [singularity_img] + + task.definition._command_args(values=values), ) output = dict(zip(keys, values)) if output["return_code"]: diff --git a/pydra/engine/graph.py b/pydra/engine/graph.py index bfa62e0764..447605955f 100644 --- a/pydra/engine/graph.py +++ b/pydra/engine/graph.py @@ -2,15 +2,28 @@ from copy import copy from pathlib import Path +import typing as ty import subprocess as sp from .helpers import ensure_list -class DiGraph: +NodeType = ty.TypeVar("NodeType") + + +class DiGraph(ty.Generic[NodeType]): """A simple Directed Graph object.""" - def __init__(self, name=None, nodes=None, edges=None): + name: str + nodes: list[NodeType] + edges: list[tuple[NodeType, NodeType]] + + def __init__( + self, + name: str | None = None, + nodes: ty.Iterable[NodeType] | None = None, + edges: ty.Iterable[tuple[NodeType, NodeType]] | None = None, + ): """ Initialize a directed graph. @@ -32,6 +45,7 @@ def __init__(self, name=None, nodes=None, edges=None): self._sorted_nodes = None self._node_wip = [] self._nodes_details = {} + self._node_lookup = {} def copy(self): """ @@ -59,20 +73,31 @@ def copy(self): return new_graph @property - def nodes(self): + def nodes(self) -> list[NodeType]: """Get a list of the nodes currently contained in the graph.""" return self._nodes @nodes.setter - def nodes(self, nodes): + def nodes(self, nodes: ty.Iterable[NodeType]) -> None: if nodes: nodes = ensure_list(nodes) - if len(set(nodes)) != len(nodes): - raise Exception("nodes have repeated elements") + # if len(set(nodes)) != len(nodes): + # raise Exception("nodes have repeated elements") self._nodes = nodes + def node(self, name: str) -> NodeType: + """Get a node by its name, caching the lookup directory""" + try: + return self._node_lookup[name] + except KeyError: + self._node_lookup = self.nodes_names_map + try: + return self._node_lookup[name] + except KeyError: + raise KeyError(f"Node {name!r} not found in graph") from None + @property - def nodes_names_map(self): + def nodes_names_map(self) -> dict[str, NodeType]: """Get a map of node names to nodes.""" return {nd.name: nd for nd in self.nodes} @@ -257,6 +282,8 @@ def remove_nodes(self, nodes, check_ready=True): self._sorted_nodes.remove(nd) # starting from the previous sorted list, so is faster self.sorting(presorted=self.sorted_nodes) + # Reset the node lookup + self._node_lookup = {} def remove_nodes_connections(self, nodes): """ @@ -278,6 +305,8 @@ def remove_nodes_connections(self, nodes): self.successors.pop(nd.name) self.predecessors.pop(nd.name) self._node_wip.remove(nd) + # Reset the node lookup + self._node_lookup = {} def remove_previous_connections(self, nodes): """ @@ -300,6 +329,8 @@ def remove_previous_connections(self, nodes): self.successors.pop(nd.name) self.predecessors.pop(nd.name) self._node_wip.remove(nd) + # Reset the node lookup + self._node_lookup = {} def _checking_successors_nodes(self, node, remove=True): if self.successors[node.name]: @@ -309,6 +340,12 @@ def _checking_successors_nodes(self, node, remove=True): else: return True + def successors_nodes(self, node): + """Get all the nodes that follow the node""" + self._successors_all = [] + self._checking_successors_nodes(node=node, remove=False) + return set(self._successors_all) + def remove_successors_nodes(self, node): """Removing all the nodes that follow the node""" self._successors_all = [] diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index e6eaa012ef..6e94089499 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -4,70 +4,122 @@ import asyncio.subprocess as asp from pathlib import Path import os +import inspect import sys -from uuid import uuid4 import getpass import typing as ty import subprocess as sp import re from time import strftime from traceback import format_exception -import attr -import attrs # New defaults +import attrs from filelock import SoftFileLock, Timeout import cloudpickle as cp -from .specs import ( - Runtime, - attr_fields, - Result, - LazyField, - File, -) -from .helpers_file import copy_nested_files -from ..utils.typing import TypeParser from fileformats.core import FileSet -from .specs import MultiInputFile, MultiInputObj, MultiOutputObj, MultiOutputFile +from pydra.utils.typing import StateArray -def ensure_list(obj, tuple2list=False): - """ - Return a list whatever the input object is. +if ty.TYPE_CHECKING: + from .specs import TaskDef, Result, WorkflowOutputs, WorkflowDef + from .core import Task + from pydra.design.base import Field + from pydra.engine.lazy import LazyField - Examples - -------- - >>> ensure_list(list("abc")) - ['a', 'b', 'c'] - >>> ensure_list("abc") - ['abc'] - >>> ensure_list(tuple("abc")) - [('a', 'b', 'c')] - >>> ensure_list(tuple("abc"), tuple2list=True) - ['a', 'b', 'c'] - >>> ensure_list(None) - [] - >>> ensure_list(5.0) - [5.0] - """ - if obj is attr.NOTHING: - return attr.NOTHING - if obj is None: +PYDRA_ATTR_METADATA = "__PYDRA_METADATA__" + +DefType = ty.TypeVar("DefType", bound="TaskDef") + + +def plot_workflow( + workflow_task: "WorkflowDef", + out_dir: Path, + type="simple", + export=None, + name=None, + output_dir=None, +): + """creating a graph - dotfile and optionally exporting to other formats""" + from .core import Workflow + + # Create output directory + out_dir.mkdir(parents=True, exist_ok=True) + + # Construct the workflow object + wf = Workflow.construct(workflow_task) + + if not name: + name = f"graph_{type(workflow_task).__name__}" + if type == "simple": + graph = wf.graph() + dotfile = graph.create_dotfile_simple(outdir=out_dir, name=name) + elif type == "nested": + graph = wf.graph() + dotfile = graph.create_dotfile_nested(outdir=out_dir, name=name) + elif type == "detailed": + graph = wf.graph(detailed=True) + dotfile = graph.create_dotfile_detailed(outdir=out_dir, name=name) + else: + raise Exception( + f"type of the graph can be simple, detailed or nested, " + f"but {type} provided" + ) + if not export: + return dotfile + else: + if export is True: + export = ["png"] + elif isinstance(export, str): + export = [export] + formatted_dot = [] + for ext in export: + formatted_dot.append(graph.export_graph(dotfile=dotfile, ext=ext)) + return dotfile, formatted_dot + + +def attrs_fields(definition, exclude_names=()) -> list[attrs.Attribute]: + """Get the fields of a definition, excluding some names.""" + return [ + field for field in definition.__attrs_attrs__ if field.name not in exclude_names + ] + + +def attrs_values(obj, **kwargs) -> dict[str, ty.Any]: + """Get the values of an attrs object.""" + return { + n: v + for n, v in attrs.asdict(obj, recurse=False, **kwargs).items() + if not n.startswith("_") + } + + +def list_fields(definition: "type[TaskDef] | TaskDef") -> list["Field"]: + """List the fields of a task definition""" + if not inspect.isclass(definition): + definition = type(definition) + if not attrs.has(definition): return [] - # list or numpy.array (this might need some extra flag in case an array has to be converted) - elif isinstance(obj, list) or hasattr(obj, "__array__"): - return obj - elif tuple2list and isinstance(obj, tuple): - return list(obj) - elif isinstance(obj, LazyField): - return obj - return [obj] + return [ + f.metadata[PYDRA_ATTR_METADATA] + for f in attrs.fields(definition) + if PYDRA_ATTR_METADATA in f.metadata + ] + + +def fields_dict(definition: "type[TaskDef] | TaskDef") -> dict[str, "Field"]: + """Returns the fields of a definition in a dictionary""" + return {f.name: f for f in list_fields(definition)} + + +# from .specs import MultiInputFile, MultiInputObj, MultiOutputObj, MultiOutputFile -def from_list_if_single(obj): +def from_list_if_single(obj: ty.Any) -> ty.Any: """Converts a list to a single item if it is of length == 1""" - if obj is attr.NOTHING: + + if obj is attrs.NOTHING: return obj - if isinstance(obj, LazyField): + if is_lazy(obj): return obj obj = list(obj) if len(obj) == 1: @@ -75,25 +127,30 @@ def from_list_if_single(obj): return obj -def print_help(obj): +def print_help(defn: "TaskDef[DefType]") -> list[str]: """Visit a task object and print its input/output interface.""" - lines = [f"Help for {obj.__class__.__name__}"] - input_klass = make_klass(obj.input_spec) - if attr.fields(input_klass): + from pydra.design.base import NO_DEFAULT + + lines = [f"Help for {defn.__class__.__name__}"] + if list_fields(defn): lines += ["Input Parameters:"] - for f in attr.fields(input_klass): + for f in list_fields(defn): + if (defn._task_type == "python" and f.name == "function") or ( + defn._task_type == "workflow" and f.name == "constructor" + ): + continue default = "" - if f.default != attr.NOTHING and not f.name.startswith("_"): + if f.default is not NO_DEFAULT and not f.name.startswith("_"): default = f" (default: {f.default})" try: name = f.type.__name__ except AttributeError: name = str(f.type) lines += [f"- {f.name}: {name}{default}"] - output_klass = make_klass(obj.output_spec) - if attr.fields(output_klass): + output_klass = defn.Outputs + if list_fields(output_klass): lines += ["Output Parameters:"] - for f in attr.fields(output_klass): + for f in list_fields(output_klass): try: name = f.type.__name__ except AttributeError: @@ -129,7 +186,12 @@ def load_result(checksum, cache_locations): return None -def save(task_path: Path, result=None, task=None, name_prefix=None): +def save( + task_path: Path, + result: "Result | None" = None, + task: "Task[DefType] | None" = None, + name_prefix: str = None, +) -> None: """ Save a :class:`~pydra.engine.core.TaskBase` object and/or results. @@ -142,6 +204,7 @@ def save(task_path: Path, result=None, task=None, name_prefix=None): task : :class:`~pydra.engine.core.TaskBase` Task to pickle and write """ + from pydra.engine.core import is_workflow if task is None and result is None: raise ValueError("Nothing to be saved") @@ -155,9 +218,15 @@ def save(task_path: Path, result=None, task=None, name_prefix=None): lockfile = task_path.parent / (task_path.name + "_save.lock") with SoftFileLock(lockfile): if result: - if task_path.name.startswith("Workflow") and result.output is not None: + if ( + result.definition + and is_workflow(result.definition) + and result.outputs is not None + ): # copy files to the workflow directory - result = copyfile_workflow(wf_path=task_path, result=result) + result.outputs = copyfile_workflow( + wf_path=task_path, outputs=result.outputs + ) with (task_path / f"{name_prefix}_result.pklz").open("wb") as fp: cp.dump(result, fp) if task: @@ -165,15 +234,19 @@ def save(task_path: Path, result=None, task=None, name_prefix=None): cp.dump(task, fp) -def copyfile_workflow(wf_path: os.PathLike, result): +def copyfile_workflow( + wf_path: os.PathLike, outputs: "WorkflowOutputs" +) -> "WorkflowOutputs": """if file in the wf results, the file will be copied to the workflow directory""" - for field in attr_fields(result.output): - value = getattr(result.output, field.name) + from .helpers_file import copy_nested_files + + for field in attrs_fields(outputs): + value = getattr(outputs, field.name) # if the field is a path or it can contain a path _copyfile_single_value is run # to move all files and directories to the workflow directory new_value = copy_nested_files(value, wf_path, mode=FileSet.CopyMode.hardlink) - setattr(result.output, field.name, new_value) - return result + setattr(outputs, field.name, new_value) + return outputs def gather_runtime_info(fname): @@ -191,6 +264,8 @@ def gather_runtime_info(fname): A runtime object containing the collected information. """ + from .specs import Runtime + runtime = Runtime(rss_peak_gb=None, vms_peak_gb=None, cpu_peak_percent=None) # Read .prof file in and set runtime values @@ -214,95 +289,6 @@ def gather_runtime_info(fname): return runtime -def make_klass(spec): - """ - Create a data class given a spec. - - Parameters - ---------- - spec : - TODO - - """ - if spec is None: - return None - fields = spec.fields - if fields: - newfields = {} - for item in fields: - if len(item) == 2: - name = item[0] - if isinstance(item[1], attr._make._CountingAttr): - newfield = item[1] - else: - newfield = attr.ib(type=item[1]) - else: - if ( - any([isinstance(ii, attr._make._CountingAttr) for ii in item]) - or len(item) > 4 - ): - raise ValueError( - "syntax not valid, you can use (name, attr), " - "(name, type, default), (name, type, default, metadata)" - "or (name, type, metadata)" - ) - kwargs = {} - if len(item) == 3: - name, tp = item[:2] - if isinstance(item[-1], dict) and "help_string" in item[-1]: - mdata = item[-1] - kwargs["metadata"] = mdata - else: - kwargs["default"] = item[-1] - elif len(item) == 4: - name, tp, dflt, mdata = item - kwargs["default"] = dflt - kwargs["metadata"] = mdata - newfield = attr.ib( - type=tp, - **kwargs, - ) - checker_label = f"'{name}' field of {spec.name}" - type_checker = TypeParser[newfield.type]( - newfield.type, label=checker_label, superclass_auto_cast=True - ) - if newfield.type in (MultiInputObj, MultiInputFile): - converter = attr.converters.pipe(ensure_list, type_checker) - elif newfield.type in (MultiOutputObj, MultiOutputFile): - converter = attr.converters.pipe(from_list_if_single, type_checker) - else: - converter = type_checker - newfield.converter = converter - newfield.on_setattr = attr.setters.convert - if "allowed_values" in newfield.metadata: - if newfield._validator is None: - newfield._validator = allowed_values_validator - elif isinstance(newfield._validator, ty.Iterable): - if allowed_values_validator not in newfield._validator: - newfield._validator.append(allowed_values_validator) - elif newfield._validator is not allowed_values_validator: - newfield._validator = [ - newfield._validator, - allowed_values_validator, - ] - newfields[name] = newfield - fields = newfields - return attrs.make_class( - spec.name, fields, bases=spec.bases, kw_only=True, on_setattr=None - ) - - -def allowed_values_validator(_, attribute, value): - """checking if the values is in allowed_values""" - allowed = attribute.metadata["allowed_values"] - if value is attr.NOTHING or isinstance(value, LazyField): - pass - elif value not in allowed: - raise ValueError( - f"value of {attribute.name} has to be from {allowed}, but {value} provided" - ) - - async def read_stream_and_display(stream, display): """ Read from stream line by line until EOF, display, and capture the lines. @@ -419,7 +405,7 @@ def create_checksum(name, inputs): String of inputs. """ - return "_".join((name, inputs)) + return "-".join((name, inputs)) def record_error(error_path, error): @@ -482,38 +468,6 @@ def get_open_loop(): return loop -def output_from_inputfields(output_spec, input_spec): - """ - Collect values from output from input fields. - If names_only is False, the output_spec is updated, - if names_only is True only the names are returned - - Parameters - ---------- - output_spec : - TODO - input_spec : - TODO - - """ - current_output_spec_names = [f.name for f in attr.fields(make_klass(output_spec))] - new_fields = [] - for fld in attr.fields(make_klass(input_spec)): - if "output_file_template" in fld.metadata: - if "output_field_name" in fld.metadata: - field_name = fld.metadata["output_field_name"] - else: - field_name = fld.name - # not adding if the field already in the output_spec - if field_name not in current_output_spec_names: - # TODO: should probably remove some of the keys - new_fields.append( - (field_name, attr.ib(type=File, metadata=fld.metadata)) - ) - output_spec.fields += new_fields - return output_spec - - def get_available_cpus(): """ Return the number of CPUs available to the current process or, if that is not @@ -542,28 +496,44 @@ def get_available_cpus(): return os.cpu_count() -def load_and_run( - task_pkl, ind=None, rerun=False, submitter=None, plugin=None, **kwargs -): +def load_and_run(task_pkl: Path, rerun: bool = False) -> Path: """ loading a task from a pickle file, settings proper input and running the task + + Parameters + ---------- + task_pkl : :obj:`Path` + The path to pickled task file + + Returns + ------- + resultfile : :obj:`Path` + The path to the pickled result file """ + + from .specs import Result + try: - task = load_task(task_pkl=task_pkl, ind=ind) + task: Task[DefType] = load_task(task_pkl=task_pkl) except Exception: if task_pkl.parent.exists(): etype, eval, etr = sys.exc_info() traceback = format_exception(etype, eval, etr) errorfile = record_error(task_pkl.parent, error=traceback) - result = Result(output=None, runtime=None, errored=True) + result = Result(output=None, runtime=None, errored=True, definition=None) save(task_pkl.parent, result=result) raise resultfile = task.output_dir / "_result.pklz" try: - task(rerun=rerun, plugin=plugin, submitter=submitter, **kwargs) - except Exception as excinfo: + if task.submitter.worker.is_async: + task.submitter.loop.run_until_complete( + task.submitter.worker.run_async(task, rerun=rerun) + ) + else: + task.submitter.worker.run(task, rerun=rerun) + except Exception as e: # creating result and error files if missing errorfile = task.output_dir / "_error.pklz" if not errorfile.exists(): # not sure if this is needed @@ -571,36 +541,27 @@ def load_and_run( traceback = format_exception(etype, eval, etr) errorfile = record_error(task.output_dir, error=traceback) if not resultfile.exists(): # not sure if this is needed - result = Result(output=None, runtime=None, errored=True) + result = Result(output=None, runtime=None, errored=True, definition=None) save(task.output_dir, result=result) - raise type(excinfo)( - str(excinfo.with_traceback(None)), - f" full crash report is here: {errorfile}", - ) + e.add_note(f" full crash report is here: {errorfile}") + raise return resultfile -async def load_and_run_async(task_pkl, ind=None, submitter=None, rerun=False, **kwargs): - """ - loading a task from a pickle file, settings proper input - and running the workflow - """ - task = load_task(task_pkl=task_pkl, ind=ind) - await task._run(submitter=submitter, rerun=rerun, **kwargs) +# async def load_and_run_async(task_pkl): +# """ +# loading a task from a pickle file, settings proper input +# and running the workflow +# """ +# task = load_task(task_pkl=task_pkl) +# await task() -def load_task(task_pkl, ind=None): +def load_task(task_pkl: Path | str) -> "Task[DefType]": """loading a task from a pickle file, settings proper input for the specific ind""" if isinstance(task_pkl, str): task_pkl = Path(task_pkl) task = cp.loads(task_pkl.read_bytes()) - if ind is not None: - ind_inputs = task.get_input_el(ind) - task.inputs = attr.evolve(task.inputs, **ind_inputs) - task._pre_split = True - task.state = None - # resetting uid for task - task._uid = uuid4().hex return task @@ -640,42 +601,6 @@ def position_sort(args): return [arg for _, arg in pos] + none + [arg for _, arg in neg] -def argstr_formatting(argstr, inputs, value_updates=None): - """formatting argstr that have form {field_name}, - using values from inputs and updating with value_update if provided - """ - inputs_dict = attr.asdict(inputs, recurse=False) - # if there is a value that has to be updated (e.g. single value from a list) - if value_updates: - inputs_dict.update(value_updates) - # getting all fields that should be formatted, i.e. {field_name}, ... - inp_fields = parse_format_string(argstr) - val_dict = {} - for fld_name in inp_fields: - fld_value = inputs_dict[fld_name] - fld_attr = getattr(attrs.fields(type(inputs)), fld_name) - if fld_value is attr.NOTHING or ( - fld_value is False - and TypeParser.matches_type(fld_attr.type, ty.Union[Path, bool]) - ): - # if value is NOTHING, nothing should be added to the command - val_dict[fld_name] = "" - else: - val_dict[fld_name] = fld_value - - # formatting string based on the val_dict - argstr_formatted = argstr.format(**val_dict) - # removing extra commas and spaces after removing the field that have NOTHING - argstr_formatted = ( - argstr_formatted.replace("[ ", "[") - .replace(" ]", "]") - .replace("[,", "[") - .replace(",]", "]") - .strip() - ) - return argstr_formatted - - class PydraFileLock: """Wrapper for filelock's SoftFileLock that makes it work with asyncio.""" @@ -702,55 +627,94 @@ async def __aexit__(self, exc_type, exc_value, traceback): return None -def parse_copyfile(fld: attr.Attribute, default_collation=FileSet.CopyCollation.any): - """Gets the copy mode from the 'copyfile' value from a field attribute""" - copyfile = fld.metadata.get("copyfile", FileSet.CopyMode.any) - if isinstance(copyfile, tuple): - mode, collation = copyfile - elif isinstance(copyfile, str): - try: - mode, collation = copyfile.split(",") - except ValueError: - mode = copyfile - collation = default_collation - else: - collation = FileSet.CopyCollation[collation] - mode = FileSet.CopyMode[mode] - else: - if copyfile is True: - mode = FileSet.CopyMode.copy - elif copyfile is False: - mode = FileSet.CopyMode.link - elif copyfile is None: - mode = FileSet.CopyMode.any - else: - mode = copyfile - collation = default_collation - if not isinstance(mode, FileSet.CopyMode): - raise TypeError( - f"Unrecognised type for mode copyfile metadata of {fld}, {mode}" - ) - if not isinstance(collation, FileSet.CopyCollation): - raise TypeError( - f"Unrecognised type for collation copyfile metadata of {fld}, {collation}" - ) - return mode, collation - - -def parse_format_string(fmtstr): +def parse_format_string(fmtstr: str) -> set[str]: """Parse a argstr format string and return all keywords used in it.""" identifier = r"[a-zA-Z_]\w*" attribute = rf"\.{identifier}" item = r"\[\w+\]" - # Example: var.attr[key][0].attr2 (capture "var") + # Example: var.attrs[key][0].attr2 (capture "var") field_with_lookups = ( f"({identifier})(?:{attribute}|{item})*" # Capture only the keyword ) conversion = "(?:!r|!s)" nobrace = "[^{}]*" # Example: 0{pads[hex]}x (capture "pads") - fmtspec = f"{nobrace}(?:{{({identifier}){nobrace}}}{nobrace})?" # Capture keywords in spec + fmtspec = f"{nobrace}(?:{{({identifier}){nobrace}}}{nobrace})?" # Capture keywords in definition full_field = f"{{{field_with_lookups}{conversion}?(?::{fmtspec})?}}" all_keywords = re.findall(full_field, fmtstr) return set().union(*all_keywords) - {""} + + +def fields_in_formatter(formatter: str | ty.Callable[..., str]) -> set[str]: + """Extract all field names from a formatter string or function.""" + if isinstance(formatter, str): + return parse_format_string(formatter) + else: + return set(inspect.signature(formatter).parameters.keys()) + + +def ensure_list(obj, tuple2list=False): + """ + Return a list whatever the input object is. + + Examples + -------- + >>> ensure_list(list("abc")) + ['a', 'b', 'c'] + >>> ensure_list("abc") + ['abc'] + >>> ensure_list(tuple("abc")) + [('a', 'b', 'c')] + >>> ensure_list(tuple("abc"), tuple2list=True) + ['a', 'b', 'c'] + >>> ensure_list(None) + [] + >>> ensure_list(5.0) + [5.0] + + """ + + if obj is attrs.NOTHING: + return attrs.NOTHING + if obj is None: + return [] + # list or numpy.array (this might need some extra flag in case an array has to be converted) + elif isinstance(obj, list) or hasattr(obj, "__array__"): + return obj + elif tuple2list and isinstance(obj, tuple): + return list(obj) + elif is_lazy(obj): + return obj + return [obj] + + +def is_lazy(obj): + """Check whether an object is a lazy field or has any attribute that is a Lazy Field""" + from pydra.engine.lazy import LazyField + + return isinstance(obj, LazyField) + + +T = ty.TypeVar("T") +U = ty.TypeVar("U") + + +def state_array_support( + function: ty.Callable[T, U], +) -> ty.Callable[T | StateArray[T], U | StateArray[U]]: + """ + Decorator to convert a allow a function to accept and return StateArray objects, + where the function is applied to each element of the StateArray. + """ + + def state_array_wrapper( + value: "T | StateArray[T] | LazyField[T]", + ) -> "U | StateArray[U] | LazyField[U]": + if is_lazy(value): + return value + if isinstance(value, StateArray): + return StateArray(function(v) for v in value) + return function(value) + + return state_array_wrapper diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index f194533ac7..7af9859974 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -8,9 +8,12 @@ from copy import copy import subprocess as sp from contextlib import contextmanager -import attr -from fileformats.core import FileSet +from fileformats.generic import FileSet +from pydra.engine.helpers import is_lazy, attrs_values, list_fields +if ty.TYPE_CHECKING: + from pydra.engine.specs import ShellDef + from pydra.design import shell logger = logging.getLogger("pydra") @@ -72,10 +75,14 @@ def copy_nested_files( **kwargs passed directly onto FileSet.copy() """ - from ..utils.typing import TypeParser # noqa + from pydra.utils.typing import TypeParser # noqa cache: ty.Dict[FileSet, FileSet] = {} + # Set to keep track of file paths that have already been copied + # to allow FileSet.copy to avoid name clashes + clashes_to_avoid = set() + def copy_fileset(fileset: FileSet): try: return cache[fileset] @@ -88,7 +95,15 @@ def copy_fileset(fileset: FileSet): MountIndentifier.on_same_mount(p, dest_dir) for p in fileset.fspaths ): supported -= FileSet.CopyMode.hardlink - copied = fileset.copy(dest_dir=dest_dir, supported_modes=supported, **kwargs) + cp_kwargs = {} + + cp_kwargs.update(kwargs) + copied = fileset.copy( + dest_dir=dest_dir, + supported_modes=supported, + avoid_clashes=clashes_to_avoid, # this prevents fname clashes between filesets + **kwargs, + ) cache[fileset] = copied return copied @@ -96,43 +111,40 @@ def copy_fileset(fileset: FileSet): # not sure if this might be useful for Function Task -def template_update(inputs, output_dir, state_ind=None, map_copyfiles=None): +def template_update( + definition, + output_dir: Path | None = None, + map_copyfiles: dict[str, Path] | None = None, +): """ - Update all templates that are present in the input spec. + Update all templates that are present in the input definition. Should be run when all inputs used in the templates are already set. """ - inputs_dict_st = attr.asdict(inputs, recurse=False) + values = attrs_values(definition) if map_copyfiles is not None: - inputs_dict_st.update(map_copyfiles) + values.update(map_copyfiles) - if state_ind is not None: - for k, v in state_ind.items(): - k = k.split(".")[1] - inputs_dict_st[k] = inputs_dict_st[k][v] - - from .specs import attr_fields + from pydra.design import shell # Collect templated inputs for which all requirements are satisfied. fields_templ = [ field - for field in attr_fields(inputs) - if field.metadata.get("output_file_template") - and getattr(inputs, field.name) is not False - and all( - getattr(inputs, required_field) is not attr.NOTHING - for required_field in field.metadata.get("requires", ()) - ) + for field in list_fields(definition) + if isinstance(field, shell.outarg) + and field.path_template + and getattr(definition, field.name) + and all(req.satisfied(definition) for req in field.requires) ] dict_mod = {} for fld in fields_templ: dict_mod[fld.name] = template_update_single( field=fld, - inputs=inputs, - inputs_dict_st=inputs_dict_st, + definition=definition, + values=values, output_dir=output_dir, ) # adding elements from map_copyfiles to fields with templates @@ -142,28 +154,31 @@ def template_update(inputs, output_dir, state_ind=None, map_copyfiles=None): def template_update_single( - field, inputs, inputs_dict_st=None, output_dir=None, spec_type="input" -): + field: "shell.outarg", + definition: "ShellDef", + values: dict[str, ty.Any] = None, + output_dir: Path | None = None, + spec_type: str = "input", +) -> Path | list[Path | None] | None: """Update a single template from the input_spec or output_spec based on the value from inputs_dict (checking the types of the fields, that have "output_file_template)" """ # if input_dict_st with state specific value is not available, # the dictionary will be created from inputs object - from ..utils.typing import TypeParser # noqa - from pydra.engine.specs import LazyField, OUTPUT_TEMPLATE_TYPES + from pydra.utils.typing import TypeParser, OUTPUT_TEMPLATE_TYPES # noqa - if inputs_dict_st is None: - inputs_dict_st = attr.asdict(inputs, recurse=False) + if values is None: + values = attrs_values(definition) if spec_type == "input": - inp_val_set = inputs_dict_st[field.name] - if isinstance(inp_val_set, bool) and field.type in (Path, str): + field_value = values[field.name] + if isinstance(field_value, bool) and field.type in (Path, str): raise TypeError( f"type of '{field.name}' is Path, consider using Union[Path, bool]" ) - if inp_val_set is not attr.NOTHING and not isinstance(inp_val_set, LazyField): - inp_val_set = TypeParser(ty.Union[OUTPUT_TEMPLATE_TYPES])(inp_val_set) + if field_value is not None and not is_lazy(field_value): + field_value = TypeParser(ty.Union[OUTPUT_TEMPLATE_TYPES])(field_value) elif spec_type == "output": if not TypeParser.contains_type(FileSet, field.type): raise TypeError( @@ -174,85 +189,107 @@ def template_update_single( raise TypeError(f"spec_type can be input or output, but {spec_type} provided") # for inputs that the value is set (so the template is ignored) if spec_type == "input": - if isinstance(inp_val_set, (Path, list)): - return inp_val_set - if inp_val_set is False: + if isinstance(field_value, (Path, list)): + return field_value + if field_value is False: # if input fld is set to False, the fld shouldn't be used (setting NOTHING) - return attr.NOTHING + return None # inputs_dict[field.name] is True or spec_type is output - value = _template_formatting(field, inputs, inputs_dict_st) - # changing path so it is in the output_dir - if output_dir and value is not attr.NOTHING: + value = _template_formatting(field, definition, values) + if output_dir and value is not None: + # changing path so it is in the output_dir # should be converted to str, it is also used for input fields that should be str if type(value) is list: - return [str(output_dir / Path(val).name) for val in value] + value = [output_dir / val.name for val in value] else: - return str(output_dir / Path(value).name) - else: - return attr.NOTHING + value = output_dir / value.name + return value -def _template_formatting(field, inputs, inputs_dict_st): +def _template_formatting( + field: "shell.arg", definition: "ShellDef", values: dict[str, ty.Any] +) -> Path | list[Path] | None: """Formatting the field template based on the values from inputs. Taking into account that the field with a template can be a MultiOutputFile and the field values needed in the template can be a list - returning a list of formatted templates in that case. Allowing for multiple input values used in the template as longs as there is no more than one file (i.e. File, PathLike or string with extensions) + + Parameters + ---------- + field : pydra.engine.helpers.Field + field with a template + definition : pydra.engine.specs.TaskDef + the task definition + values : dict + dictionary with values from inputs object + + Returns + ------- + formatted : Path or list[Path | None] or None + formatted template """ # if a template is a function it has to be run first with the inputs as the only arg - template = field.metadata["output_file_template"] + template = field.path_template if callable(template): - template = template(inputs) + template = template(definition) # as default, we assume that keep_extension is True if isinstance(template, (tuple, list)): - formatted = [ - _string_template_formatting(field, t, inputs, inputs_dict_st) - for t in template - ] + formatted = [_single_template_formatting(field, t, values) for t in template] + if any([val is None for val in formatted]): + return None else: assert isinstance(template, str) - formatted = _string_template_formatting(field, template, inputs, inputs_dict_st) + formatted = _single_template_formatting(field, template, values) return formatted -def _string_template_formatting(field, template, inputs, inputs_dict_st): - from .specs import MultiInputObj, MultiOutputFile +def _single_template_formatting( + field: "shell.outarg", + template: str, + values: dict[str, ty.Any], +) -> Path | None: + from pydra.utils.typing import MultiInputObj, MultiOutputFile - keep_extension = field.metadata.get("keep_extension", True) inp_fields = re.findall(r"{\w+}", template) inp_fields_fl = re.findall(r"{\w+:[0-9.]+f}", template) inp_fields += [re.sub(":[0-9.]+f", "", el) for el in inp_fields_fl] + + # FIXME: This would be a better solution, and would allow you to explicitly specify + # whether you want to use the extension of the input file or not, by referencing + # the "ext" attribute of the input file. However, this would require a change in the + # way the element formatting is done + # + # inp_fields = set(re.findall(r"{(\w+)(?:\.\w+)?(?::[0-9.]+f)?}", template)) + if len(inp_fields) == 0: - return template + return Path(template) val_dict = {} file_template = None for fld in inp_fields: fld_name = fld[1:-1] # extracting the name form {field_name} - if fld_name not in inputs_dict_st: + if fld_name not in values: raise AttributeError(f"{fld_name} is not provided in the input") - fld_value = inputs_dict_st[fld_name] - if fld_value is attr.NOTHING: + fld_value = values[fld_name] + if fld_value is None: # if value is NOTHING, nothing should be added to the command - return attr.NOTHING - else: - # checking for fields that can be treated as a file: - # have type File, or value that is path like (including str with extensions) - if isinstance(fld_value, os.PathLike) or ( - isinstance(fld_value, str) and "." in fld_value - ): - if file_template: - raise Exception( - f"can't have multiple paths in {field.name} template," - f" but {template} provided" - ) - else: - file_template = (fld_name, fld_value) + return None + # checking for fields that can be treated as a file: + # have type File, or value that is path like (including str with extensions) + if isinstance(fld_value, os.PathLike): + if file_template: + raise Exception( + f"can't have multiple paths in {field.name} template," + f" but {template} provided" + ) else: - val_dict[fld_name] = fld_value + file_template = (fld_name, fld_value) + else: + val_dict[fld_name] = fld_value # if field is MultiOutputFile and some elements from val_dict are lists, # each element of the list should be used separately in the template @@ -280,17 +317,29 @@ def _string_template_formatting(field, template, inputs, inputs_dict_st): formatted_value.append( _element_formatting( - template, val_dict_el, file_template, keep_extension=keep_extension + template, + val_dict_el, + file_template, + keep_extension=field.keep_extension, ) ) else: formatted_value = _element_formatting( - template, val_dict, file_template, keep_extension=keep_extension + template, val_dict, file_template, keep_extension=field.keep_extension ) - return formatted_value + if isinstance(formatted_value, list): + return [Path(val) for val in formatted_value] + elif isinstance(formatted_value, str): + return Path(formatted_value) + return None -def _element_formatting(template, values_template_dict, file_template, keep_extension): +def _element_formatting( + template: str, + values_template_dict: dict[str, ty.Any], + file_template: str, + keep_extension: bool, +): """Formatting a single template for a single element (if a list). Taking into account that a file used in the template (file_template) and the template itself could have file extensions @@ -329,7 +378,7 @@ def _element_formatting(template, values_template_dict, file_template, keep_exte def is_local_file(f): - from ..utils.typing import TypeParser + from pydra.utils.typing import TypeParser return "container_path" not in f.metadata and TypeParser.contains_type( FileSet, f.type diff --git a/pydra/engine/helpers_state.py b/pydra/engine/helpers_state.py index 866d408a46..091aa81d6b 100644 --- a/pydra/engine/helpers_state.py +++ b/pydra/engine/helpers_state.py @@ -1,11 +1,10 @@ """Additional functions used mostly by the State class.""" -import attr import itertools from copy import deepcopy import logging import typing as ty -from .helpers import ensure_list +from .helpers import ensure_list, attrs_values logger = logging.getLogger("pydra") @@ -622,9 +621,7 @@ def map_splits(split_iter, inputs, cont_dim=None): def inputs_types_to_dict(name, inputs): """Convert type.Inputs to dictionary.""" # dj: any better option? - input_names = [ - field for field in attr.asdict(inputs, recurse=False) if field != "_func" - ] + input_names = [field for field in attrs_values(inputs) if field != "_func"] inputs_dict = {} for field in input_names: inputs_dict[f"{name}.{field}"] = getattr(inputs, field) @@ -632,7 +629,7 @@ def inputs_types_to_dict(name, inputs): def unwrap_splitter( - splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...]] + splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...]], ) -> ty.Iterable[str]: """Unwraps a splitter into a flat list of fields that are split over, i.e. [("a", "b"), "c"] -> ["a", "b", "c"] @@ -640,7 +637,7 @@ def unwrap_splitter( Parameters ---------- splitter: str or list[str] or tuple[str, ...] - the splitter spec to unwrap + the splitter definition to unwrap Returns ------- diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py new file mode 100644 index 0000000000..06c909fd9f --- /dev/null +++ b/pydra/engine/lazy.py @@ -0,0 +1,211 @@ +import typing as ty +import abc +import attrs +from pydra.utils.typing import StateArray +from pydra.utils.hash import hash_single +from . import node + +if ty.TYPE_CHECKING: + from .submitter import DiGraph, NodeExecution + from .core import Task, Workflow + from .specs import TaskDef + + +T = ty.TypeVar("T") +DefType = ty.TypeVar("DefType", bound="TaskDef") + +TypeOrAny = ty.Union[type, ty.Any] + + +@attrs.define(kw_only=True) +class LazyField(ty.Generic[T], metaclass=abc.ABCMeta): + """Lazy fields implement promises.""" + + _field: str + _type: TypeOrAny + _cast_from: ty.Optional[ty.Type[ty.Any]] = None + _type_checked: bool = False + + def __bytes_repr__(self, cache): + yield type(self).__name__.encode() + b"(" + yield from bytes(hash_single(self.source, cache)) + yield b"field=" + self._field.encode() + yield b"type=" + bytes(hash_single(self._type, cache)) + yield b"cast_from=" + bytes(hash_single(self._cast_from, cache)) + yield b")" + + def _apply_cast(self, value): + """\"Casts\" the value from the retrieved type if a cast has been applied to + the lazy-field""" + from pydra.utils.typing import TypeParser + + if self._cast_from: + assert TypeParser.matches(value, self._cast_from) + value = self._type(value) + return value + + def _get_value( + self, + workflow: "Workflow", + graph: "DiGraph[NodeExecution]", + state_index: int | None = None, + ) -> ty.Any: + """Return the value of a lazy field. + + Parameters + ---------- + workflow: Workflow + the workflow object + graph: DiGraph[NodeExecution] + the graph representing the execution state of the workflow + state_index : int, optional + the state index of the field to access + + Returns + ------- + value : Any + the resolved value of the lazy-field + """ + raise NotImplementedError("LazyField is an abstract class") + + +@attrs.define(kw_only=True) +class LazyInField(LazyField[T]): + + _workflow: "Workflow" = attrs.field() + + _attr_type = "input" + + def __eq__(self, other): + return ( + isinstance(other, LazyInField) + and self._field == other._field + and self._type == other._type + ) + + def __repr__(self): + return f"{type(self).__name__}(field={self._field!r}, type={self._type})" + + @property + def _source(self): + return self._workflow + + def _get_value( + self, + workflow: "Workflow", + graph: "DiGraph[NodeExecution]", + state_index: int | None = None, + ) -> ty.Any: + """Return the value of a lazy field. + + Parameters + ---------- + workflow: Workflow + the workflow object + graph: DiGraph[NodeExecution] + the graph representing the execution state of the workflow + state_index : int, optional + the state index of the field to access + + Returns + ------- + value : Any + the resolved value of the lazy-field + """ + value = workflow.inputs[self._field] + value = self._apply_cast(value) + return value + + +@attrs.define(kw_only=True) +class LazyOutField(LazyField[T]): + + _node: node.Node + _attr_type = "output" + + def __repr__(self): + return ( + f"{type(self).__name__}(node={self._node.name!r}, " + f"field={self._field!r}, type={self._type})" + ) + + def _get_value( + self, + workflow: "Workflow", + graph: "DiGraph[NodeExecution]", + state_index: int | None = None, + ) -> ty.Any: + """Return the value of a lazy field. + + Parameters + ---------- + workflow: Workflow + the workflow object + graph: DiGraph[NodeExecution] + the graph representing the execution state of the workflow + state_index : int, optional + the state index of the field to access + + Returns + ------- + value : Any + the resolved value of the lazy-field + """ + state = self._node.state + jobs = graph.node(self._node.name).get_jobs(state_index) + + def retrieve_from_job(job: "Task[DefType]") -> ty.Any: + if job.errored: + raise ValueError( + f"Cannot retrieve value for {self._field} from {self._node.name} as " + "the node errored" + ) + res = job.result() + if res is None: + raise RuntimeError( + f"Could not find results of '{job.name}' node in a sub-directory " + f"named '{{{job.checksum}}}' in any of the cache locations.\n" + + "\n".join(str(p) for p in set(job.cache_locations)) + + f"\n\nThis is likely due to hash changes in '{job.name}' node inputs. " + f"Current values and hashes: {job.inputs}, " + f"{job.definition._hash}\n\n" + "Set loglevel to 'debug' in order to track hash changes " + "throughout the execution of the workflow.\n\n " + "These issues may have been caused by `bytes_repr()` methods " + "that don't return stable hash values for specific object " + "types across multiple processes (see bytes_repr() " + '"singledispatch "function in pydra/utils/hash.py).' + "You may need to write specific `bytes_repr()` " + "implementations (see `pydra.utils.hash.register_serializer`) or a " + "`__bytes_repr__()` dunder methods to handle one or more types in " + "your interface inputs." + ) + val = res.get_output_field(self._field) + val = self._apply_cast(val) + return val + + if not isinstance(jobs, StateArray): # single job + return retrieve_from_job(jobs) + elif not state or not state.depth(before_combine=True): + assert len(jobs) == 1 + return retrieve_from_job(jobs[0]) + # elif state.combiner and state.keys_final: + # # We initialise it here rather than using a defaultdict to ensure the order + # # of the keys matches how it is defined in the state so we can return the + # # values in the correct order + # sorted_values = {frozenset(i.items()): [] for i in state.states_ind_final} + # # Iterate through the jobs and append the values to the correct final state + # # key + # for job in jobs: + # state_key = frozenset( + # (key, state.states_ind[job.state_index][key]) + # for key in state.keys_final + # ) + # sorted_values[state_key].append(retrieve_from_job(job)) + # return StateArray(sorted_values.values()) + # else: + return [retrieve_from_job(j) for j in jobs] + + @property + def _source(self): + return self._node diff --git a/pydra/engine/node.py b/pydra/engine/node.py new file mode 100644 index 0000000000..906d29847e --- /dev/null +++ b/pydra/engine/node.py @@ -0,0 +1,258 @@ +import typing as ty +from copy import deepcopy +from enum import Enum +import attrs +from . import lazy +from pydra.engine.helpers import ( + attrs_values, + is_lazy, +) +from pydra.engine import helpers_state as hlpst +from pydra.engine.state import State + +if ty.TYPE_CHECKING: + from .core import Workflow + from .environments import Environment + from pydra.engine.specs import TaskDef, TaskOutputs, TaskHooks + + +OutputType = ty.TypeVar("OutputType", bound="TaskOutputs") +Splitter = ty.Union[str, ty.Tuple[str, ...]] + +_not_set = Enum("_not_set", "NOT_SET") + +NOT_SET = _not_set.NOT_SET + + +@attrs.define +class Node(ty.Generic[OutputType]): + """A node in a workflow + + Parameters + ---------- + name : str + The name of the node + inputs : TaskDef + The definition of the node + """ + + name: str + _definition: "TaskDef[OutputType]" + _environment: "Environment | None" = None + _hooks: "TaskHooks | None" = None + _workflow: "Workflow" = attrs.field(default=None, eq=False, hash=False, repr=False) + _lzout: OutputType | None = attrs.field( + init=False, default=None, eq=False, hash=False, repr=False + ) + _state: State | None = attrs.field(init=False, default=NOT_SET) + + def __attrs_post_init__(self): + self._set_state() + + class Inputs: + """A class to wrap the inputs of a node and control access to them so lazy fields + that will change the downstream state (i.e. with new splits) aren't set after + the node has been split, combined or its outputs accessed. + """ + + _node: "Node" + + def __init__(self, node: "Node") -> None: + super().__setattr__("_node", node) + + def __getattr__(self, name: str) -> ty.Any: + return getattr(self._node._definition, name) + + def __getstate__(self) -> ty.Dict[str, ty.Any]: + return {"_node": self._node} + + def __setstate__(self, state: ty.Dict[str, ty.Any]) -> None: + super().__setattr__("_node", state["_node"]) + + def __setattr__(self, name: str, value: ty.Any) -> None: + setattr(self._node._definition, name, value) + if is_lazy(value): + upstream_states = self._node._get_upstream_states() + if ( + not self._node._state + or self._node._state.other_states != upstream_states + ): + self._node._check_if_outputs_have_been_used( + f"cannot set {name!r} input to {value} because it changes the " + f"state" + ) + self._set_state() + + @property + def inputs(self) -> Inputs: + return self.Inputs(self) + + @property + def input_names(self) -> list[str]: + return list(attrs_values(self._definition).keys()) + + @property + def state(self): + """Initialise the state of the node just after it has been created (i.e. before + it has been split or combined) based on the upstream connections + """ + return self._state + + @property + def input_values(self) -> tuple[tuple[str, ty.Any]]: + return tuple(attrs_values(self._definition).items()) + + @property + def state_values(self) -> dict[str, ty.Any]: + """Get the values of the task definition, scoped by the name of the node to be + used in the state + + Returns + ------- + dict[str, Any] + The values of the task definition + """ + return { + f"{self.name}.{n}": v for n, v in attrs_values(self._definition).items() + } + + @property + def lzout(self) -> OutputType: + from pydra.engine.helpers import list_fields + + """The output definition of the node populated with lazy fields""" + if self._lzout is not None: + return self._lzout + lazy_fields = {} + for field in list_fields(self.inputs.Outputs): + lazy_fields[field.name] = lazy.LazyOutField( + node=self, + field=field.name, + type=field.type, + ) + outputs = self.inputs.Outputs(**lazy_fields) + + outpt: lazy.LazyOutField + for outpt in attrs_values(outputs).values(): + # Assign the current node to the lazy fields so they can access the state + outpt._node = self + # If the node has a non-empty state, wrap the type of the lazy field in + # a combination of an optional list and a number of nested StateArrays + # types based on the number of states the node is split over and whether + # it has a combiner + if self._state: + outpt._type = self._state.nest_output_type(outpt._type) + # Flag the output lazy fields as being not typed checked (i.e. assigned to + # another node's inputs) yet. This is used to prevent the user from changing + # the type of the output after it has been accessed by connecting it to an + # output of an upstream node with additional state variables. + outpt._type_checked = False + self._lzout = outputs + return outputs + + @property + def cont_dim(self): + # adding inner_cont_dim to the general container_dimension provided by the users + cont_dim_all = deepcopy(self._cont_dim) + for k, v in self.state._inner_cont_dim.items(): + cont_dim_all[k] = cont_dim_all.get(k, 1) + v + return cont_dim_all + + @cont_dim.setter + def cont_dim(self, cont_dim): + if cont_dim is None: + self._cont_dim = {} + else: + self._cont_dim = cont_dim + + @property + def splitter(self): + if not self._state: + return () + return self._state.splitter + + @property + def combiner(self): + if not self._state: + return () + return self._state.combiner + + def _check_if_outputs_have_been_used(self, msg): + used = [] + if self._lzout: + for outpt_name, outpt_val in attrs.asdict( + self._lzout, recurse=False + ).items(): + if outpt_val.type_checked: + used.append(outpt_name) + if used: + raise RuntimeError( + f"Outputs {used} of {self} have already been accessed and therefore " + + msg + ) + + def _set_state(self) -> None: + # Add node name to state's splitter, combiner and cont_dim loaded from the def + splitter = deepcopy( + self._definition._splitter + ) # these can be modified in state + combiner = deepcopy( + self._definition._combiner + ) # these can be modified in state + cont_dim = {} + if splitter: + splitter = hlpst.add_name_splitter(splitter, self.name) + if combiner: + combiner = hlpst.add_name_combiner(combiner, self.name) + if self._definition._cont_dim: + for key, val in self._definition._cont_dim.items(): + cont_dim[f"{self.name}.{key}"] = val + other_states = self._get_upstream_states() + if splitter or combiner or other_states: + self._state = State( + self.name, + splitter=splitter, + other_states=other_states, + combiner=combiner, + cont_dim=cont_dim, + ) + if combiner: + if not_split := [ + c + for c in combiner + if not any(c in s for s in self.state.splitter_rpn) + ]: + raise ValueError( + f"Combiner fields {not_split} for Node {self.name!r} are not in the " + f"splitter fields {self.state.splitter_rpn}" + ) + else: + self._state = None + + def _get_upstream_states(self) -> dict[str, tuple["State", list[str]]]: + """Get the states of the upstream nodes that are connected to this node""" + upstream_states = {} + for inpt_name, val in self.input_values: + if ( + isinstance(val, lazy.LazyOutField) + and val._node.state + and val._node.state.depth() + ): + node: Node = val._node + # variables that are part of inner splitters should be treated as a containers + if node.state and f"{node.name}.{inpt_name}" in node.state.splitter: + node.state._inner_cont_dim[f"{node.name}.{inpt_name}"] = 1 + # adding task_name: (task.state, [a field from the connection] + if node.name not in upstream_states: + upstream_states[node.name] = (node.state, [inpt_name]) + else: + # if the task already exist in other_state, + # additional field name should be added to the list of fields + upstream_states[node.name][1].append(inpt_name) + return upstream_states + + # else: + # # todo it never gets here + # breakpoint() + # inputs_dict = {inp: getattr(self.inputs, inp) for inp in self.input_names} + # return None, inputs_dict diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index a2e3651779..5752cab39c 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -1,88 +1,469 @@ -"""Task I/O specifications.""" +"""Task I/O definitions.""" from pathlib import Path -import typing as ty -import inspect import re -import os from copy import copy +import os +import inspect +import itertools +import platform +import shlex +from collections import Counter +import typing as ty from glob import glob -import attr -from fileformats.core import FileSet -from fileformats.generic import ( - File, - Directory, +from copy import deepcopy +from typing import Self +import attrs +from attrs.converters import default_if_none +import cloudpickle as cp +from fileformats.generic import FileSet, File +from pydra.utils.messenger import AuditFlag, Messenger +from pydra.utils.typing import is_optional, optional_type +from .helpers import ( + attrs_fields, + attrs_values, + is_lazy, + list_fields, + position_sort, + ensure_list, + parse_format_string, + fields_in_formatter, + state_array_support, ) -import pydra -from .helpers_file import template_update_single -from ..utils.hash import hash_function, Cache +from .helpers_file import template_update, template_update_single +from . import helpers_state as hlpst +from . import lazy +from pydra.utils.hash import hash_function, Cache +from pydra.utils.typing import ( + StateArray, + is_multi_input, + is_fileset_or_union, + MultiOutputObj, + MultiOutputFile, +) +from pydra.design.base import Field, Arg, Out, RequirementSet, NO_DEFAULT +from pydra.design import shell + +if ty.TYPE_CHECKING: + from pydra.engine.core import Task + from pydra.engine.graph import DiGraph + from pydra.engine.submitter import NodeExecution + from pydra.engine.core import Workflow + from pydra.engine.environments import Environment + from pydra.engine.workers import Worker -# from ..utils.misc import add_exc_note +DefType = ty.TypeVar("DefType", bound="TaskDef") -T = ty.TypeVar("T") +def is_set(value: ty.Any) -> bool: + """Check if a value has been set.""" + return value not in (attrs.NOTHING, NO_DEFAULT) -def attr_fields(spec, exclude_names=()): - return [field for field in spec.__attrs_attrs__ if field.name not in exclude_names] +@attrs.define(kw_only=True, auto_attribs=False, eq=False) +class TaskOutputs: + """Base class for all output definitions""" -# These are special types that are checked for in the construction of input/output specs -# and special converters inserted into the attrs fields. + RESERVED_FIELD_NAMES = ("inputs",) + @property + def inputs(self): + """The inputs object associated with a lazy-outputs object""" + return self._get_node().inputs -class MultiInputObj(list, ty.Generic[T]): - pass + @classmethod + def _from_defaults(cls) -> Self: + """Create an output object from the default values of the fields""" + return cls( + **{ + f.name: ( + f.default.factory() + if isinstance(f.default, attrs.Factory) + else f.default + ) + for f in attrs_fields(cls) + } + ) + + def _get_node(self): + try: + return self._node + except AttributeError: + raise AttributeError( + f"{self} outputs object is not a lazy output of a workflow node" + ) from None + + def __iter__(self) -> ty.Generator[str, None, None]: + """The names of the fields in the output object""" + return iter(sorted(f.name for f in attrs_fields(self))) + + def __getitem__(self, name_or_index: str | int) -> ty.Any: + """Return the value for the given attribute + + Parameters + ---------- + name : str + the name of the attribute to return + Returns + ------- + Any + the value of the attribute + """ + if isinstance(name_or_index, int): + return list(self)[name_or_index] + try: + return getattr(self, name_or_index) + except AttributeError: + raise KeyError( + f"{self} doesn't have an attribute {name_or_index}" + ) from None + + def __eq__(self, other: ty.Any) -> bool: + """Check if two task definitions are equal""" + values = attrs.asdict(self) + fields = list_fields(self) + try: + other_values = attrs.asdict(other) + except AttributeError: + return False + try: + other_fields = list_fields(other) + except AttributeError: + return False + if fields != other_fields: + return False + for field in list_fields(self): + if field.hash_eq: + values[field.name] = hash_function(values[field.name]) + other_values[field.name] = hash_function(other_values[field.name]) + return values == other_values -MultiInputFile = MultiInputObj[File] +OutputsType = ty.TypeVar("OutputType", bound=TaskOutputs) -# Since we can't create a NewType from a type union, we add a dummy type to the union -# so we can detect the MultiOutput in the input/output spec creation -class MultiOutputType: - pass +def donothing(*args: ty.Any, **kwargs: ty.Any) -> None: + return None -MultiOutputObj = ty.Union[list, object, MultiOutputType] -MultiOutputFile = ty.Union[File, ty.List[File], MultiOutputType] -OUTPUT_TEMPLATE_TYPES = ( - Path, - ty.List[Path], - ty.Union[Path, bool], - ty.Union[ty.List[Path], bool], - ty.List[ty.List[Path]], -) +@attrs.define(kw_only=True) +class TaskHooks: + """Callable task hooks.""" + pre_run_task: ty.Callable = attrs.field( + default=donothing, converter=default_if_none(donothing) + ) + post_run_task: ty.Callable = attrs.field( + default=donothing, converter=default_if_none(donothing) + ) + pre_run: ty.Callable = attrs.field( + default=donothing, converter=default_if_none(donothing) + ) + post_run: ty.Callable = attrs.field( + default=donothing, converter=default_if_none(donothing) + ) -@attr.s(auto_attribs=True, kw_only=True) -class SpecInfo: - """Base data structure for metadata of specifications.""" + def reset(self): + for val in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: + setattr(self, val, donothing) - name: str - """A name for the specification.""" - fields: ty.List[ty.Tuple] = attr.ib(factory=list) - """List of names of fields (can be inputs or outputs).""" - bases: ty.Sequence[ty.Type["BaseSpec"]] = attr.ib(factory=tuple) - """Keeps track of specification inheritance. - Should be a tuple containing at least one BaseSpec """ +@attrs.define(kw_only=True, auto_attribs=False, eq=False) +class TaskDef(ty.Generic[OutputsType]): + """Base class for all task definitions""" + + # Class attributes + _xor: frozenset[frozenset[str | None]] = ( + frozenset() + ) # overwritten in derived classes + + # The following fields are used to store split/combine state information + _splitter = attrs.field(default=None, init=False, repr=False) + _combiner = attrs.field(default=None, init=False, repr=False) + _cont_dim = attrs.field(default=None, init=False, repr=False) + _hashes = attrs.field(default=None, init=False, eq=False, repr=False) + + RESERVED_FIELD_NAMES = ("split", "combine") + + def __call__( + self, + /, + cache_dir: os.PathLike | None = None, + worker: "str | ty.Type[Worker] | Worker" = "debug", + environment: "Environment | None" = None, + rerun: bool = False, + cache_locations: ty.Iterable[os.PathLike] | None = None, + audit_flags: AuditFlag = AuditFlag.NONE, + messengers: ty.Iterable[Messenger] | None = None, + messenger_args: dict[str, ty.Any] | None = None, + hooks: TaskHooks | None = None, + **kwargs: ty.Any, + ) -> OutputsType: + """Create a task from this definition and execute it to produce a result. -@attr.s(auto_attribs=True, kw_only=True) -class BaseSpec: - """The base dataclass specs for all inputs and outputs.""" + Parameters + ---------- + cache_dir : os.PathLike, optional + Cache directory where the working directory/results for the task will be + stored, by default None + worker : str or Worker, optional + The worker to use, by default "cf" + environment: Environment, optional + The execution environment to use, by default None + rerun : bool, optional + Whether to force the re-computation of the task results even if existing + results are found, by default False + cache_locations : list[os.PathLike], optional + Alternate cache locations to check for pre-computed results, by default None + audit_flags : AuditFlag, optional + Auditing configuration, by default AuditFlag.NONE + messengers : list, optional + Messengers, by default None + messenger_args : dict, optional + Messenger arguments, by default None + **kwargs : dict + Keyword arguments to pass on to the worker initialisation - def collect_additional_outputs(self, inputs, output_dir, outputs): - """Get additional outputs.""" - return {} + Returns + ------- + OutputsType or list[OutputsType] + The output interface of the task, or in the case of split tasks, a list of + output interfaces + """ + from pydra.engine.submitter import ( # noqa: F811 + Submitter, + WORKER_KWARG_FAIL_NOTE, + ) + + try: + with Submitter( + audit_flags=audit_flags, + cache_dir=cache_dir, + cache_locations=cache_locations, + messenger_args=messenger_args, + messengers=messengers, + environment=environment, + worker=worker, + **kwargs, + ) as sub: + result = sub( + self, + hooks=hooks, + rerun=rerun, + ) + except TypeError as e: + # Catch any inadvertent passing of task definition parameters to the + # execution call + if hasattr(e, "__notes__") and WORKER_KWARG_FAIL_NOTE in e.__notes__: + if match := re.match( + r".*got an unexpected keyword argument '(\w+)'", str(e) + ): + if match.group(1) in self: + e.add_note( + f"Note that the unrecognised argument, {match.group(1)!r}, is " + f"an input of the task definition {self!r} that has already been " + f"parameterised (it is being called to execute it)" + ) + raise + if result.errored: + if isinstance(self, WorkflowDef) or self._splitter: + raise RuntimeError(f"Workflow {self} failed with errors") + else: + errors = result.errors + raise RuntimeError( + f"Task {self} failed @ {errors['time of crash']} with the following errors:\n" + + "\n".join(errors["error message"]) + ) + return result.outputs + + def split( + self, + splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...], None] = None, + /, + overwrite: bool = False, + cont_dim: ty.Optional[dict] = None, + **inputs, + ) -> Self: + """ + Run this task parametrically over lists of split inputs. + + Parameters + ---------- + splitter : str or list[str] or tuple[str] or None + the fields which to split over. If splitting over multiple fields, lists of + fields are interpreted as outer-products and tuples inner-products. If None, + then the fields to split are taken from the keyword-arg names. + overwrite : bool, optional + whether to overwrite an existing split on the node, by default False + cont_dim : dict, optional + Container dimensions for specific inputs, used in the splitter. + If input name is not in cont_dim, it is assumed that the input values has + a container dimension of 1, so only the most outer dim will be used for splitting. + **inputs + fields to split over, will be automatically wrapped in a StateArray object + and passed to the node inputs + + Returns + ------- + self : TaskBase + a reference to the task + """ + if self._splitter and not overwrite: + raise ValueError( + f"Cannot overwrite existing splitter {self._splitter} on {self}, " + "set 'overwrite=True' to do so" + ) + if splitter: + unwraped_split = list(hlpst.unwrap_splitter(splitter)) + if duplicated := [f for f, c in Counter(unwraped_split).items() if c > 1]: + raise ValueError(f"Splitter fields {duplicated} are duplicated") + split_names = set( + s for s in unwraped_split if not s.startswith("_") and "." not in s + ) + input_names = set(inputs) + if missing_inputs := list(split_names - input_names): + raise ValueError( + f"Splitter fields {missing_inputs} need to be provided as a keyword " + f"arguments to the split method (provided {list(inputs)})" + ) + if unrecognised_inputs := list(input_names - split_names): + raise ValueError( + f"Provided inputs {unrecognised_inputs} are not present in the " + f"splitter {splitter}" + ) + else: + # If no splitter is provided, use the names of the inputs as combinatorial splitter + split_names = splitter = list(inputs) + for field_name in cont_dim or []: + if field_name not in split_names: + raise ValueError( + f"Container dimension for {field_name} is provided but the field " + f"is not present in the inputs" + ) + split_inputs = {} + for name, value in inputs.items(): + if isinstance(value, lazy.LazyField): + split_val = value.split(splitter) + elif isinstance(value, ty.Iterable) and not isinstance( + value, (ty.Mapping, str) + ): + split_val = StateArray(value) + else: + raise TypeError( + f"Could not split {value!r} as it is not a sequence type" + ) + split_inputs[name] = split_val + split_def = attrs.evolve(self, **split_inputs) + split_def._splitter = splitter + split_def._cont_dim = cont_dim + return split_def + + def combine( + self, + combiner: ty.Union[ty.List[str], str], + overwrite: bool = False, + ) -> Self: + """ + Combine inputs parameterized by one or more previous tasks. + + Parameters + ---------- + combiner : list[str] or str + the field or list of inputs to be combined (i.e. not left split) after the + task has been run + overwrite : bool + whether to overwrite an existing combiner on the node + **kwargs : dict[str, Any] + values for the task that will be "combined" before they are provided to the + node + + Returns + ------- + self : Self + a reference to the outputs object + """ + if self._combiner and not overwrite: + raise ValueError( + f"Attempting to overwrite existing combiner {self._combiner} on {self}, " + "set 'overwrite=True' to do so" + ) + if isinstance(combiner, str): + combiner = [combiner] + local_names = set(c for c in combiner if "." not in c and not c.startswith("_")) + if unrecognised := local_names - set(self): + raise ValueError( + f"Combiner fields {unrecognised} are not present in the task definition" + ) + combined_def = copy(self) + combined_def._combiner = combiner + return combined_def + + def __iter__(self) -> ty.Generator[str, None, None]: + """Iterate through all the names in the definition""" + return ( + f.name + for f in list_fields(self) + if not (f.name.startswith("_") or f.name in self.RESERVED_FIELD_NAMES) + ) + + def __eq__(self, other: ty.Any) -> bool: + """Check if two task definitions are equal""" + values = attrs.asdict(self, recurse=False) + try: + other_values = attrs.asdict(other, recurse=False) + except AttributeError: + return False + if set(values) != set(other_values): + return False # Return if attribute keys don't match + for field in list_fields(self): + if field.hash_eq: + values[field.name] = hash_function(values[field.name]) + other_values[field.name] = hash_function(other_values[field.name]) + if values != other_values: + return False + hash_cache = Cache() + if hash_function(type(self), cache=hash_cache) != hash_function( + type(other), cache=hash_cache + ): + return False + try: + other_outputs = other.Outputs + except AttributeError: + return False + return hash_function(self.Outputs, cache=hash_cache) == hash_function( + other_outputs, cache=hash_cache + ) + + def __getitem__(self, name: str) -> ty.Any: + """Return the value for the given attribute, resolving any templates + + Parameters + ---------- + name : str + the name of the attribute to return + + Returns + ------- + Any + the value of the attribute + """ + try: + return getattr(self, name) + except AttributeError: + raise KeyError(f"{self} doesn't have an attribute {name}") from None @property - def hash(self): + def _hash(self): hsh, self._hashes = self._compute_hashes() return hsh - def hash_changes(self): + @property + def _checksum(self): + return f"{self._task_type}-{self._hash}" + + def _hash_changes(self): """Detects any changes in the hashed values between the current inputs and the previously calculated values""" _, new_hashes = self._compute_hashes() @@ -91,108 +472,139 @@ def hash_changes(self): def _compute_hashes(self) -> ty.Tuple[bytes, ty.Dict[str, bytes]]: """Compute a basic hash for any given set of fields.""" inp_dict = {} - for field in attr_fields( - self, exclude_names=("_graph_checksums", "bindings", "files_hash") - ): - if field.metadata.get("output_file_template"): - continue + for field in list_fields(self): + if isinstance(field, Out): + continue # Skip output fields # removing values that are not set from hash calculation - if getattr(self, field.name) is attr.NOTHING: + if getattr(self, field.name) is attrs.NOTHING: continue - if "container_path" in field.metadata: + if getattr(field, "container_path", False): continue inp_dict[field.name] = getattr(self, field.name) + # Include the outputs class, just in case any names or types have changed + inp_dict["Outputs"] = self.Outputs hash_cache = Cache() field_hashes = { k: hash_function(v, cache=hash_cache) for k, v in inp_dict.items() } - if hasattr(self, "_graph_checksums"): - field_hashes["_graph_checksums"] = self._graph_checksums return hash_function(sorted(field_hashes.items())), field_hashes - def retrieve_values(self, wf, state_index: ty.Optional[int] = None): - """Get values contained by this spec.""" - retrieved_values = {} - for field in attr_fields(self): - value = getattr(self, field.name) - if isinstance(value, LazyField): - retrieved_values[field.name] = value.get_value( - wf, state_index=state_index - ) - for field, val in retrieved_values.items(): - setattr(self, field, val) - - def check_fields_input_spec(self): - """ - Check fields from input spec based on the medatada. - - e.g., if xor, requires are fulfilled, if value provided when mandatory. + def _rule_violations(self) -> list[str]: + """Check rules and returns a list of errors.""" - """ - fields = attr_fields(self) + field: Arg + errors = [] + for field in list_fields(self): + value = self[field.name] - for field in fields: - field_is_mandatory = bool(field.metadata.get("mandatory")) - field_is_unset = getattr(self, field.name) is attr.NOTHING - - if field_is_unset and not field_is_mandatory: + if is_lazy(value): continue - # Collect alternative fields associated with this field. - alternative_fields = { - name: getattr(self, name) is not attr.NOTHING - for name in field.metadata.get("xor", []) - if name != field.name - } - alternatives_are_set = any(alternative_fields.values()) + if ( + value is attrs.NOTHING + and not getattr(field, "path_template", False) + and not field.readonly + ): + errors.append(f"Mandatory field {field.name!r} is not set") - # Raise error if no field in mandatory alternative group is set. - if field_is_unset: - if alternatives_are_set: - continue - message = f"{field.name} is mandatory and unset." - if alternative_fields: - raise AttributeError( - message[:-1] - + f", but no alternative provided by {list(alternative_fields)}." + # Raise error if any required field is unset. + if ( + not ( + value is None + or value is False + or ( + is_optional(field.type) + and is_fileset_or_union(field.type) + and value is True + ) + ) + and field.requires + and not any(rs.satisfied(self) for rs in field.requires) + ): + if len(field.requires) > 1: + qualification = ( + " at least one of the following requirements to be satisfied: " ) else: - raise AttributeError(message) - - # Raise error if multiple alternatives are set. - elif alternatives_are_set: - set_alternative_fields = [ - name for name, is_set in alternative_fields.items() if is_set - ] - raise AttributeError( - f"{field.name} is mutually exclusive with {set_alternative_fields}" + qualification = "" + errors.append( + f"{field.name!r} requires{qualification} {[str(r) for r in field.requires]}" + ) + # Collect alternative fields associated with this field. + for xor_set in self._xor: + mutually_exclusive = {name: self[name] for name in xor_set if name} + are_set = [f"{n}={v!r}" for n, v in mutually_exclusive.items() if v] + if len(are_set) > 1: + errors.append( + f"Mutually exclusive fields ({', '.join(sorted(are_set))}) are set " + "together" + ) + elif not are_set and None not in xor_set: + errors.append( + "At least one of the mutually exclusive fields should be set: " + + ", ".join(f"{n}={v!r}" for n, v in mutually_exclusive.items()) ) + return errors - # Collect required fields associated with this field. - required_fields = { - name: getattr(self, name) is not attr.NOTHING - for name in field.metadata.get("requires", []) - if name != field.name - } + def _check_rules(self): + """Check if all rules are satisfied.""" - # Raise error if any required field is unset. - if not all(required_fields.values()): - unset_required_fields = [ - name for name, is_set in required_fields.items() if not is_set - ] - raise AttributeError(f"{field.name} requires {unset_required_fields}") + attrs.validate(self) - def check_metadata(self): - """Check contained metadata.""" + if errors := self._rule_violations(): + raise ValueError( + f"Found the following errors in task {self} definition:\n" + + "\n".join(errors) + ) - def template_update(self): - """Update template.""" + @classmethod + def _check_arg_refs( + cls, + inputs: list[Arg], + outputs: list[Out], + xor: frozenset[frozenset[str | None]], + ) -> None: + """ + Checks if all fields referenced in requirements and xor are present in the inputs + are valid field names + """ + field: Field + input_names = set(inputs) + for field in itertools.chain(inputs.values(), outputs.values()): + if unrecognised := ( + set([r.name for s in field.requires for r in s]) - input_names + ): + raise ValueError( + "'Unrecognised' field names in referenced in the requirements " + f"of {field} " + str(list(unrecognised)) + ) - def copyfile_input(self, output_dir): - """Copy the file pointed by a :class:`File` input.""" + for xor_set in xor: + if unrecognised := xor_set - (input_names | {None}): + raise ValueError( + f"'Unrecognised' field names in referenced in the xor {xor_set} " + + str(list(unrecognised)) + ) + for field_name in xor_set: + if field_name is None: # i.e. none of the fields being set is valid + continue + type_ = inputs[field_name].type + if type_ not in (ty.Any, bool) and not is_optional(type_): + raise ValueError( + f"Fields included in a 'xor' ({field_name!r}) must be of boolean " + f"or optional types, not type {type_}" + ) + + def _check_resolved(self): + """Checks that all the fields in the definition have been resolved""" + if lazy_values := [n for n, v in attrs_values(self).items() if is_lazy(v)]: + raise ValueError( + f"Cannot execute {self} because the following fields " + f"still have lazy values {lazy_values}" + ) -@attr.s(auto_attribs=True, kw_only=True) +@attrs.define(kw_only=True) class Runtime: """Represent run time metadata.""" @@ -204,31 +616,31 @@ class Runtime: """Peak in cpu consumption.""" -@attr.s(auto_attribs=True, kw_only=True) -class Result: +@attrs.define(kw_only=True) +class Result(ty.Generic[OutputsType]): """Metadata regarding the outputs of processing.""" - output: ty.Optional[ty.Any] = None - runtime: ty.Optional[Runtime] = None + output_dir: Path + outputs: OutputsType | None = None + runtime: Runtime | None = None errored: bool = False + definition: TaskDef[OutputsType] | None = None + + CLOUD_PICKLE_ATTRS = ("outputs", "definition") def __getstate__(self): - state = self.__dict__.copy() - if state["output"] is not None: - fields = tuple((el.name, el.type) for el in attr_fields(state["output"])) - state["output_spec"] = (state["output"].__class__.__name__, fields) - state["output"] = attr.asdict(state["output"], recurse=False) + state = attrs_values(self) + for attr in self.CLOUD_PICKLE_ATTRS: + if state[attr] is not None: + state[attr] = cp.dumps(state[attr]) return state def __setstate__(self, state): - if "output_spec" in state: - spec = list(state["output_spec"]) - del state["output_spec"] - klass = attr.make_class( - spec[0], {k: attr.ib(type=v) for k, v in list(spec[1])} - ) - state["output"] = klass(**state["output"]) - self.__dict__.update(state) + for attr in self.CLOUD_PICKLE_ATTRS: + if state[attr] is not None: + state[attr] = cp.loads(state[attr]) + for name, val in state.items(): + setattr(self, name, val) def get_output_field(self, field_name): """Used in get_values in Workflow @@ -239,12 +651,29 @@ def get_output_field(self, field_name): Name of field in LazyField object """ if field_name == "all_": - return attr.asdict(self.output, recurse=False) + return attrs_values(self.outputs) else: - return getattr(self.output, field_name) + return getattr(self.outputs, field_name) + @property + def errors(self): + if self.errored: + error_file = self.output_dir / "_error.pklz" + if error_file.exists(): + with open(error_file, "rb") as f: + return cp.load(f) + return None + + @property + def task(self): + task_pkl = self.output_dir / "_task.pklz" + if not task_pkl.exists(): + return None + with open(task_pkl, "rb") as f: + return cp.load(f) -@attr.s(auto_attribs=True, kw_only=True) + +@attrs.define(kw_only=True) class RuntimeSpec: """ Specification for a task. @@ -269,809 +698,638 @@ class RuntimeSpec: network: bool = False -@attr.s(auto_attribs=True, kw_only=True) -class FunctionSpec(BaseSpec): - """Specification for a process invoked from a shell.""" +@attrs.define(kw_only=True, auto_attribs=False, eq=False) +class PythonOutputs(TaskOutputs): - def check_metadata(self): - """ - Check the metadata for fields in input_spec and fields. + @classmethod + def _from_task(cls, task: "Task[PythonDef]") -> Self: + """Collect the outputs of a task from a combination of the provided inputs, + the objects in the output directory, and the stdout and stderr of the process. - Also sets the default values when available and needed. + Parameters + ---------- + task : Task[PythonDef] + The task whose outputs are being collected. + outputs_dict : dict[str, ty.Any] + The outputs of the task, as a dictionary + Returns + ------- + outputs : Outputs + The outputs of the task in dataclass """ - supported_keys = { - "allowed_values", - "copyfile", - "help_string", - "mandatory", - # "readonly", #likely not needed - # "output_field_name", #likely not needed - # "output_file_template", #likely not needed - "requires", - "keep_extension", - "xor", - "sep", - } - for fld in attr_fields(self, exclude_names=("_func", "_graph_checksums")): - mdata = fld.metadata - # checking keys from metadata - if set(mdata.keys()) - supported_keys: - raise AttributeError( - f"only these keys are supported {supported_keys}, but " - f"{set(mdata.keys()) - supported_keys} provided" - ) - # checking if the help string is provided (required field) - if "help_string" not in mdata: - raise AttributeError(f"{fld.name} doesn't have help_string field") - # not allowing for default if the field is mandatory - if not fld.default == attr.NOTHING and mdata.get("mandatory"): - raise AttributeError( - f"default value ({fld.default!r}) should not be set when the field " - f"('{fld.name}') in {self}) is mandatory" - ) - # setting default if value not provided and default is available - if getattr(self, fld.name) is None: - if not fld.default == attr.NOTHING: - setattr(self, fld.name, fld.default) - - -@attr.s(auto_attribs=True, kw_only=True) -class ShellSpec(BaseSpec): - """Specification for a process invoked from a shell.""" - - executable: ty.Union[str, ty.List[str]] = attr.ib( - metadata={ - "help_string": "the first part of the command, can be a string, " - "e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']" - } - ) - args: ty.Union[str, ty.List[str], None] = attr.ib( - None, - metadata={ - "help_string": "the last part of the command, can be a string, " - "e.g. , or a list" - }, - ) - - def retrieve_values(self, wf, state_index=None): - """Parse output results.""" - temp_values = {} - for field in attr_fields(self): - # retrieving values that do not have templates - if not field.metadata.get("output_file_template"): - value = getattr(self, field.name) - if isinstance(value, LazyField): - temp_values[field.name] = value.get_value( - wf, state_index=state_index - ) - for field, val in temp_values.items(): - value = path_to_string(value) - setattr(self, field, val) + outputs = cls._from_defaults() + for name, val in task.return_values.items(): + setattr(outputs, name, val) + return outputs + + +PythonOutputsType = ty.TypeVar("OutputType", bound=PythonOutputs) + + +@attrs.define(kw_only=True, auto_attribs=False, eq=False) +class PythonDef(TaskDef[PythonOutputsType]): + + _task_type = "python" + + def _run(self, task: "Task[PythonDef]", rerun: bool = True) -> None: + # Prepare the inputs to the function + inputs = attrs_values(self) + del inputs["function"] + # Run the actual function + returned = self.function(**inputs) + # Collect the outputs and save them into the task.return_values dictionary + task.return_values = {f.name: f.default for f in attrs.fields(self.Outputs)} + return_names = list(task.return_values) + if returned is None: + task.return_values = {nm: None for nm in return_names} + elif len(task.return_values) == 1: + # if only one element in the fields, everything should be returned together + task.return_values = {list(task.return_values)[0]: returned} + elif isinstance(returned, tuple) and len(return_names) == len(returned): + task.return_values = dict(zip(return_names, returned)) + elif isinstance(returned, dict): + task.return_values = {key: returned.get(key, None) for key in return_names} + else: + raise RuntimeError( + f"expected {len(return_names)} elements, but {returned} were returned" + ) - def check_metadata(self): - """ - Check the metadata for fields in input_spec and fields. - Also sets the default values when available and needed. +@attrs.define(kw_only=True, auto_attribs=False, eq=False) +class WorkflowOutputs(TaskOutputs): - """ - from ..utils.typing import TypeParser - - supported_keys = { - "allowed_values", - "argstr", - "container_path", - "copyfile", - "help_string", - "mandatory", - "readonly", - "output_field_name", - "output_file_template", - "position", - "requires", - "keep_extension", - "xor", - "sep", - "formatter", - "_output_type", - } + @classmethod + def _from_task(cls, task: "Task[WorkflowDef]") -> Self: + """Collect the outputs of a workflow task from the outputs of the nodes in the - for fld in attr_fields(self, exclude_names=("_func", "_graph_checksums")): - mdata = fld.metadata - # checking keys from metadata - if set(mdata.keys()) - supported_keys: - raise AttributeError( - f"only these keys are supported {supported_keys}, but " - f"{set(mdata.keys()) - supported_keys} provided for '{fld.name}' " - f"field in {self}" - ) - # checking if the help string is provided (required field) - if "help_string" not in mdata: - raise AttributeError( - f"{fld.name} doesn't have help_string field in {self}" - ) - # assuming that fields with output_file_template shouldn't have default - if mdata.get("output_file_template"): - if not any( - TypeParser.matches_type(fld.type, t) for t in OUTPUT_TEMPLATE_TYPES - ): - raise TypeError( - f"Type of '{fld.name}' should be one of {OUTPUT_TEMPLATE_TYPES} " - f"(not {fld.type}) because it has a value for output_file_template " - f"({mdata['output_file_template']!r})" - ) - if fld.default not in [attr.NOTHING, True, False]: - raise AttributeError( - f"default value ({fld.default!r}) should not be set together with " - f"output_file_template ({mdata['output_file_template']!r}) for " - f"'{fld.name}' field in {self}" - ) - # not allowing for default if the field is mandatory - if not fld.default == attr.NOTHING and mdata.get("mandatory"): - raise AttributeError( - f"default value ({fld.default!r}) should not be set when the field " - f"('{fld.name}') in {self}) is mandatory" - ) - # setting default if value not provided and default is available - if getattr(self, fld.name) is None: - if not fld.default == attr.NOTHING: - setattr(self, fld.name, fld.default) - - -@attr.s(auto_attribs=True, kw_only=True) -class ShellOutSpec: - """Output specification of a generic shell process.""" - - return_code: int - """The process' exit code.""" - stdout: str - """The process' standard output.""" - stderr: str - """The process' standard input.""" - - def collect_additional_outputs(self, inputs, output_dir, outputs): - from ..utils.typing import TypeParser - - """Collect additional outputs from shelltask output_spec.""" - additional_out = {} - for fld in attr_fields(self, exclude_names=("return_code", "stdout", "stderr")): - if not TypeParser.is_subclass( - fld.type, - ( - os.PathLike, - MultiOutputObj, - int, - float, - bool, - str, - list, - ), - ): - raise TypeError( - f"Support for {fld.type} type, required for '{fld.name}' in {self}, " - "has not been implemented in collect_additional_output" - ) - # assuming that field should have either default or metadata, but not both - input_value = getattr(inputs, fld.name, attr.NOTHING) - if input_value is not attr.NOTHING: - if TypeParser.contains_type(FileSet, fld.type): - if input_value is not False: - label = f"output field '{fld.name}' of {self}" - input_value = TypeParser(fld.type, label=label).coerce( - input_value - ) - additional_out[fld.name] = input_value - elif ( - fld.default is None or fld.default == attr.NOTHING - ) and not fld.metadata: # TODO: is it right? - raise AttributeError("File has to have default value or metadata") - elif fld.default != attr.NOTHING: - additional_out[fld.name] = self._field_defaultvalue(fld, output_dir) - elif fld.metadata: - if ( - fld.type in [int, float, bool, str, list] - and "callable" not in fld.metadata - ): - raise AttributeError( - f"{fld.type} has to have a callable in metadata" - ) - additional_out[fld.name] = self._field_metadata( - fld, inputs, output_dir, outputs - ) - return additional_out + Parameters + ---------- + task : Task[WorfklowDef] + The task whose outputs are being collected. - def generated_output_names(self, inputs, output_dir): - """Returns a list of all outputs that will be generated by the task. - Takes into account the task input and the requires list for the output fields. - TODO: should be in all Output specs? + Returns + ------- + outputs : Outputs + The outputs of the task """ - # checking the input (if all mandatory fields are provided, etc.) - inputs.check_fields_input_spec() - output_names = ["return_code", "stdout", "stderr"] - for fld in attr_fields(self, exclude_names=("return_code", "stdout", "stderr")): - if fld.type not in [File, MultiOutputFile, Directory]: - raise Exception("not implemented (collect_additional_output)") - # assuming that field should have either default or metadata, but not both - if ( - fld.default in (None, attr.NOTHING) and not fld.metadata - ): # TODO: is it right? - raise AttributeError("File has to have default value or metadata") - elif fld.default != attr.NOTHING: - output_names.append(fld.name) - elif ( - fld.metadata - and self._field_metadata( - fld, inputs, output_dir, outputs=None, check_existance=False - ) - != attr.NOTHING - ): - output_names.append(fld.name) - return output_names - - def _field_defaultvalue(self, fld, output_dir): - """Collect output file if the default value specified.""" - if not isinstance(fld.default, (str, Path)): - raise AttributeError( - f"{fld.name} is a File, so default value " - f"should be a string or a Path, " - f"{fld.default} provided" - ) - default = fld.default - if isinstance(default, str): - default = Path(default) - - default = output_dir / default - if "*" not in str(default): - if default.exists(): - return default - else: - raise AttributeError(f"file {default} does not exist") - else: - all_files = [Path(el) for el in glob(str(default.expanduser()))] - if len(all_files) > 1: - return all_files - elif len(all_files) == 1: - return all_files[0] - else: - raise AttributeError(f"no file matches {default.name}") - - def _field_metadata( - self, fld, inputs, output_dir, outputs=None, check_existance=True - ): - """Collect output file if metadata specified.""" - if self._check_requires(fld, inputs) is False: - return attr.NOTHING - - if "value" in fld.metadata: - return output_dir / fld.metadata["value"] - # this block is only run if "output_file_template" is provided in output_spec - # if the field is set in input_spec with output_file_template, - # than the field already should have value - elif "output_file_template" in fld.metadata: - value = template_update_single( - fld, inputs=inputs, output_dir=output_dir, spec_type="output" - ) - - if fld.type is MultiOutputFile and type(value) is list: - # TODO: how to deal with mandatory list outputs - ret = [] - for val in value: - val = Path(val) - if check_existance and not val.exists(): - ret.append(attr.NOTHING) - else: - ret.append(val) - return ret - else: - val = Path(value) - # checking if the file exists - if check_existance and not val.exists(): - # if mandatory raise exception - if "mandatory" in fld.metadata: - if fld.metadata["mandatory"]: - raise Exception( - f"mandatory output for variable {fld.name} does not exist" - ) - return attr.NOTHING - return val - elif "callable" in fld.metadata: - callable_ = fld.metadata["callable"] - if isinstance(callable_, staticmethod): - # In case callable is defined as a static method, - # retrieve the function wrapped in the descriptor. - callable_ = callable_.__func__ - call_args = inspect.getfullargspec(callable_) - call_args_val = {} - for argnm in call_args.args: - if argnm == "field": - call_args_val[argnm] = fld - elif argnm == "output_dir": - call_args_val[argnm] = output_dir - elif argnm == "inputs": - call_args_val[argnm] = inputs - elif argnm == "stdout": - call_args_val[argnm] = outputs["stdout"] - elif argnm == "stderr": - call_args_val[argnm] = outputs["stderr"] + outputs = cls._from_defaults() + # collecting outputs from tasks + output_wf = {} + lazy_field: lazy.LazyOutField + workflow: "Workflow" = task.return_values["workflow"] + exec_graph: "DiGraph[NodeExecution]" = task.return_values["exec_graph"] + nodes_dict = {n.name: n for n in exec_graph.nodes} + for name, lazy_field in attrs_values(workflow.outputs).items(): + try: + val_out = lazy_field._get_value(workflow=workflow, graph=exec_graph) + if isinstance(val_out, StateArray): + val_out = list(val_out) # implicitly combine state arrays + output_wf[name] = val_out + except (ValueError, AttributeError): + output_wf[name] = None + node: "NodeExecution" = nodes_dict[lazy_field._node.name] + # checking if the tasks has predecessors that raises error + if isinstance(node.errored, list): + raise ValueError(f"Tasks {node._errored} raised an error") else: - try: - call_args_val[argnm] = getattr(inputs, argnm) - except AttributeError: - raise AttributeError( - f"arguments of the callable function from {fld.name} " - f"has to be in inputs or be field or output_dir, " - f"but {argnm} is used" - ) - return callable_(**call_args_val) - else: - raise Exception( - f"Metadata for '{fld.name}', does not not contain any of the required fields " - f'("callable", "output_file_template" or "value"): {fld.metadata}.' - ) - - def _check_requires(self, fld, inputs): - """checking if all fields from the requires and template are set in the input - if requires is a list of list, checking if at least one list has all elements set - """ - from .helpers import ensure_list - - if "requires" in fld.metadata: - # if requires is a list of list it is treated as el[0] OR el[1] OR... - required_fields = ensure_list(fld.metadata["requires"]) - if all([isinstance(el, list) for el in required_fields]): - field_required_OR = required_fields - # if requires is a list of tuples/strings - I'm creating a 1-el nested list - elif all([isinstance(el, (str, tuple)) for el in required_fields]): - field_required_OR = [required_fields] - else: - raise Exception( - f"requires field can be a list of list, or a list " - f"of strings/tuples, but {fld.metadata['requires']} " - f"provided for {fld.name}" - ) - else: - field_required_OR = [[]] - - for field_required in field_required_OR: - # if the output has output_file_template field, - # adding all input fields from the template to requires - if "output_file_template" in fld.metadata: - template = fld.metadata["output_file_template"] - # if a template is a function it has to be run first with the inputs as the only arg - if callable(template): - template = template(inputs) - inp_fields = re.findall(r"{\w+}", template) - field_required += [ - el[1:-1] for el in inp_fields if el[1:-1] not in field_required - ] - - # it's a flag, of the field from the list is not in input it will be changed to False - required_found = True - for field_required in field_required_OR: - required_found = True - # checking if the input fields from requires have set values - for inp in field_required: - if isinstance(inp, str): # name of the input field - if not hasattr(inputs, inp): - raise Exception( - f"{inp} is not a valid input field, can't be used in requires" - ) - elif getattr(inputs, inp) in [attr.NOTHING, None]: - required_found = False - break - elif isinstance(inp, tuple): # (name, allowed values) - inp, allowed_val = inp[0], ensure_list(inp[1]) - if not hasattr(inputs, inp): - raise Exception( - f"{inp} is not a valid input field, can't be used in requires" + err_files = [(t.output_dir / "_error.pklz") for t in node.tasks] + err_files = [f for f in err_files if f.exists()] + if not err_files: + raise + raise ValueError( + f"Task {lazy_field._node.name!r} raised an error, full crash report is " + f"here: " + + ( + str(err_files[0]) + if len(err_files) == 1 + else "\n" + "\n".join(str(f) for f in err_files) ) - elif getattr(inputs, inp) not in allowed_val: - required_found = False - break - else: - raise Exception( - f"each element of the requires element should be a string or a tuple, " - f"but {inp} is found in {field_required}" ) - # if the specific list from field_required_OR has all elements set, no need to check more - if required_found: - break + return attrs.evolve(outputs, **output_wf) - if required_found: - return True - else: - return False +WorkflowOutputsType = ty.TypeVar("OutputType", bound=WorkflowOutputs) -@attr.s -class LazyInterface: - _task: "core.TaskBase" = attr.ib() - _attr_type: str - def __getattr__(self, name): - if name in ("_task", "_attr_type", "_field_names"): - raise AttributeError(f"{name} hasn't been set yet") - if name not in self._field_names: - raise AttributeError( - f"Task '{self._task.name}' has no {self._attr_type} attribute '{name}', " - "available: '" + "', '".join(self._field_names) + "'" - ) - type_ = self._get_type(name) - splits = self._get_task_splits() - combines = self._get_task_combines() - if combines and self._attr_type == "output": - # Add in any scalar splits referencing upstream splits, i.e. "_myupstreamtask", - # "_myarbitrarytask" - combined_upstreams = set() - if self._task.state: - for scalar in LazyField.sanitize_splitter( - self._task.state.splitter, strip_previous=False - ): - for field in scalar: - if field.startswith("_"): - node_name = field[1:] - if any(c.split(".")[0] == node_name for c in combines): - combines.update( - f for f in scalar if not f.startswith("_") - ) - combined_upstreams.update( - f[1:] for f in scalar if f.startswith("_") - ) - if combines: - # Wrap type in list which holds the combined items - type_ = ty.List[type_] - # Iterate through splits to remove any splits which are removed by the - # combiner - for splitter in copy(splits): - remaining = tuple( - s - for s in splitter - if not any( - (x in combines or x.split(".")[0] in combined_upstreams) - for x in s - ) - ) - if remaining != splitter: - splits.remove(splitter) - if remaining: - splits.add(remaining) - # Wrap the type in a nested StateArray type - if splits: - type_ = StateArray[type_] - lf_klass = LazyInField if self._attr_type == "input" else LazyOutField - return lf_klass[type_]( - name=self._task.name, - field=name, - type=type_, - splits=splits, - ) - - def _get_task_splits(self) -> ty.Set[ty.Tuple[ty.Tuple[str, ...], ...]]: - """Returns the states over which the inputs of the task are split""" - splitter = self._task.state.splitter if self._task.state else None - splits = set() - if splitter: - # Ensure that splits is of tuple[tuple[str, ...], ...] form - splitter = LazyField.sanitize_splitter(splitter) - if splitter: - splits.add(splitter) - for inpt in attr.asdict(self._task.inputs, recurse=False).values(): - if isinstance(inpt, LazyField): - splits.update(inpt.splits) - return splits - - def _get_task_combines(self) -> ty.Set[ty.Union[str, ty.Tuple[str, ...]]]: - """Returns the states over which the outputs of the task are combined""" - combiner = ( - self._task.state.combiner - if self._task.state is not None - else getattr(self._task, "fut_combiner", None) - ) - return set(combiner) if combiner else set() +@attrs.define(kw_only=True, auto_attribs=False, eq=False) +class WorkflowDef(TaskDef[WorkflowOutputsType]): + _task_type = "workflow" -class LazyIn(LazyInterface): - _attr_type = "input" + RESERVED_FIELD_NAMES = TaskDef.RESERVED_FIELD_NAMES + ("construct",) - def _get_type(self, name): - attr = next(t for n, t in self._task.input_spec.fields if n == name) - if attr is None: - return ty.Any - elif inspect.isclass(attr): - return attr - else: - return attr.type + _constructed = attrs.field(default=None, init=False, repr=False, eq=False) - @property - def _field_names(self): - return [field[0] for field in self._task.input_spec.fields] + def _run(self, task: "Task[WorkflowDef]", rerun: bool) -> None: + """Run the workflow.""" + task.submitter.expand_workflow(task, rerun) + async def _run_async(self, task: "Task[WorkflowDef]", rerun: bool) -> None: + """Run the workflow asynchronously.""" + await task.submitter.expand_workflow_async(task, rerun) -class LazyOut(LazyInterface): - _attr_type = "output" - - def _get_type(self, name): - try: - type_ = next(f[1] for f in self._task.output_spec.fields if f[0] == name) - except StopIteration: - type_ = ty.Any - else: - if not inspect.isclass(type_): - try: - type_ = type_.type # attrs _CountingAttribute - except AttributeError: - pass # typing._SpecialForm - return type_ + def construct(self) -> "Workflow": + from pydra.engine.core import Workflow - @property - def _field_names(self): - return self._task.output_names + ["all_"] + if self._constructed is not None: + return self._constructed + self._constructed = Workflow.construct(self) + return self._constructed -TypeOrAny = ty.Union[ty.Type[T], ty.Any] -Splitter = ty.Union[str, ty.Tuple[str, ...]] +RETURN_CODE_HELP = """The process' exit code.""" +STDOUT_HELP = """The standard output stream produced by the command.""" +STDERR_HELP = """The standard error stream produced by the command.""" -@attr.s(auto_attribs=True, kw_only=True) -class LazyField(ty.Generic[T]): - """Lazy fields implement promises.""" +@attrs.define(kw_only=True, auto_attribs=False, eq=False) +class ShellOutputs(TaskOutputs): + """Output definition of a generic shell process.""" - name: str - field: str - type: TypeOrAny - # Set of splitters that have been applied to the lazy field. Note that the splitter - # specifications are transformed to a tuple[tuple[str, ...], ...] form where the - # outer tuple is the outer product, the inner tuple are inner products (where either - # product can be of length==1) - splits: ty.FrozenSet[ty.Tuple[ty.Tuple[str, ...], ...]] = attr.field( - factory=frozenset, converter=frozenset - ) - cast_from: ty.Optional[ty.Type[ty.Any]] = None + BASE_NAMES = ["return_code", "stdout", "stderr"] - def __bytes_repr__(self, cache): - yield type(self).__name__.encode() - yield self.name.encode() - yield self.field.encode() + return_code: int = shell.out(name="return_code", type=int, help=RETURN_CODE_HELP) + stdout: str = shell.out(name="stdout", type=str, help=STDOUT_HELP) + stderr: str = shell.out(name="stderr", type=str, help=STDERR_HELP) - def cast(self, new_type: TypeOrAny) -> "LazyField": - """ "casts" the lazy field to a new type + @classmethod + def _from_task(cls, task: "Task[ShellDef]") -> Self: + """Collect the outputs of a shell process from a combination of the provided inputs, + the objects in the output directory, and the stdout and stderr of the process. Parameters ---------- - new_type : type - the type to cast the lazy-field to + inputs : ShellDef + The input definition of the shell process. + output_dir : Path + The directory where the process was run. + stdout : str + The standard output of the process. + stderr : str + The standard error of the process. + return_code : int + The exit code of the process. Returns ------- - cast_field : LazyField - a copy of the lazy field with the new type + outputs : ShellOutputs + The outputs of the shell process """ - return type(self)[new_type]( - name=self.name, - field=self.field, - type=new_type, - splits=self.splits, - cast_from=self.cast_from if self.cast_from else self.type, - ) + outputs = cls._from_defaults() + fld: shell.out + for fld in list_fields(cls): + if fld.name in ["return_code", "stdout", "stderr"]: + resolved_value = task.return_values[fld.name] + # Get the corresponding value from the inputs if it exists, which will be + # passed through to the outputs, to permit manual overrides + elif isinstance(fld, shell.outarg) and isinstance( + task.inputs[fld.name], Path + ): + resolved_value = task.inputs[fld.name] + elif is_set(fld.default): + resolved_value = cls._resolve_default_value(fld, task.output_dir) + else: + resolved_value = cls._resolve_value(fld, task) + # Set the resolved value + try: + setattr(outputs, fld.name, resolved_value) + except FileNotFoundError: + if is_optional(fld.type): + setattr(outputs, fld.name, None) + else: + raise ValueError( + f"file system path(s) provided to mandatory field {fld.name!r}, " + f"'{resolved_value}', does not exist, this is likely due to an " + f"error in the {task.name!r} task" + ) + return outputs + + # @classmethod + # def _from_defaults(cls) -> Self: + # """Create an output object from the default values of the fields""" + # defaults = {} + # for field in attrs_fields(cls): + # if isinstance(field.default, attrs.Factory): + # defaults[field.name] = field.default.factory() + # elif TypeParser.contains_type(FileSet, field.type): + # # Will be set by the templating code + # defaults[field.name] = attrs.NOTHING + # else: + # defaults[field.name] = field.default + + # return cls(**defaults) - def split(self, splitter: Splitter) -> "LazyField": - """ "Splits" the lazy field over an array of nodes by replacing the sequence type - of the lazy field with StateArray to signify that it will be "split" across + @classmethod + def _resolve_default_value(cls, fld: shell.out, output_dir: Path) -> ty.Any: + """Resolve path and glob expr default values relative to the output dir""" + default = fld.default + if fld.type is Path: + assert isinstance(default, Path) + if not default.is_absolute(): + default = output_dir.joinpath(default) + if "*" not in str(default): + if default.exists(): + return default + else: + raise FileNotFoundError(f"file {default} does not exist") + else: + all_files = [Path(el) for el in glob(default.expanduser())] + if len(all_files) > 1: + return all_files + elif len(all_files) == 1: + return all_files[0] + else: + raise FileNotFoundError(f"no file matches {default.name}") + return default - Parameters - ---------- - splitter : str or ty.Tuple[str, ...] or ty.List[str] - the splitter to append to the list of splitters + @classmethod + def _required_fields_satisfied(cls, fld: shell.out, inputs: "ShellDef") -> bool: + """checking if all fields from the requires and template are set in the input + if requires is a list of list, checking if at least one list has all elements set """ - from ..utils.typing import TypeParser # pylint: disable=import-outside-toplevel - - splits = self.splits | set([LazyField.sanitize_splitter(splitter)]) - # Check to see whether the field has already been split over the given splitter - if splits == self.splits: - return self - - # Modify the type of the lazy field to include the split across a state-array - inner_type, prev_split_depth = TypeParser.strip_splits(self.type) - assert prev_split_depth <= 1 - if inner_type is ty.Any: - type_ = StateArray[ty.Any] - elif TypeParser.matches_type(inner_type, list): - item_type = TypeParser.get_item_type(inner_type) - type_ = StateArray[item_type] + + if not fld.requires: + return True + + requirements: list[RequirementSet] + if fld.requires: + requirements = deepcopy(fld.requires) else: - raise TypeError( - f"Cannot split non-sequence field {self} of type {inner_type}" - ) - if prev_split_depth: - type_ = StateArray[type_] - return type(self)[type_]( - name=self.name, - field=self.field, - type=type_, - splits=splits, - ) + requirements = [RequirementSet()] + + # if the output has output_file_template field, add in all input fields from + # the template to requires + if isinstance(fld, shell.outarg) and fld.path_template: + # if a template is a function it has to be run first with the inputs as the only arg + if callable(fld.path_template): + template = fld.path_template(inputs) + else: + template = fld.path_template + inp_fields = re.findall(r"{(\w+)(?:\:[^\}]+)?}", template) + for req in requirements: + req += inp_fields + + # Check to see if any of the requirement sets are satisfied + return any(rs.satisfied(inputs) for rs in requirements) @classmethod - def sanitize_splitter( - cls, splitter: Splitter, strip_previous: bool = True - ) -> ty.Tuple[ty.Tuple[str, ...], ...]: - """Converts the splitter spec into a consistent tuple[tuple[str, ...], ...] form - used in LazyFields""" - if isinstance(splitter, str): - splitter = (splitter,) - if isinstance(splitter, tuple): - splitter = (splitter,) # type: ignore - else: - assert isinstance(splitter, list) - # convert to frozenset to differentiate from tuple, yet still be hashable - # (NB: order of fields in list splitters aren't relevant) - splitter = tuple((s,) if isinstance(s, str) else s for s in splitter) - # Strip out fields starting with "_" designating splits in upstream nodes - if strip_previous: - stripped = tuple( - tuple(f for f in i if not f.startswith("_")) for i in splitter + def _resolve_value( + cls, + fld: "shell.out", + task: "Task[DefType]", + ) -> ty.Any: + """Collect output file if metadata specified.""" + from pydra.design import shell + + if not cls._required_fields_satisfied(fld, task.definition): + return None + if isinstance(fld, shell.outarg) and fld.path_template: + return template_update_single( + fld, + definition=task.definition, + output_dir=task.output_dir, + spec_type="output", ) - splitter = tuple(s for s in stripped if s) # type: ignore - return splitter # type: ignore - - def _apply_cast(self, value): - """\"Casts\" the value from the retrieved type if a cast has been applied to - the lazy-field""" - from pydra.utils.typing import TypeParser + assert fld.callable, ( + f"Output field '{fld.name}', does not not contain any of the required fields " + f'("callable", "output_file_template" or "value"): {fld}.' + ) + callable_ = fld.callable + if isinstance(fld.callable, staticmethod): + # In case callable is defined as a static method, + # retrieve the function wrapped in the descriptor. + callable_ = fld.callable.__func__ + call_args = inspect.getfullargspec(callable_) + call_args_val = {} + for argnm in call_args.args: + if argnm == "field": + call_args_val[argnm] = fld + elif argnm == "output_dir": + call_args_val[argnm] = task.output_dir + elif argnm == "executable": + call_args_val[argnm] = task.definition.executable + elif argnm == "inputs": + call_args_val[argnm] = task.inputs + elif argnm == "stdout": + call_args_val[argnm] = task.return_values["stdout"] + elif argnm == "stderr": + call_args_val[argnm] = task.return_values["stderr"] + elif argnm == "self": + pass # If the callable is a class + else: + try: + call_args_val[argnm] = task.inputs[argnm] + except KeyError as e: + e.add_note( + f"arguments of the callable function from {fld.name!r} " + f"has to be in inputs or be field or output_dir, " + f"but {argnm!r} is used" + ) + raise + return callable_(**call_args_val) - if self.cast_from: - assert TypeParser.matches(value, self.cast_from) - value = self.type(value) - return value +ShellOutputsType = ty.TypeVar("OutputType", bound=ShellOutputs) -class LazyInField(LazyField[T]): - attr_type = "input" - def get_value( - self, wf: "pydra.Workflow", state_index: ty.Optional[int] = None - ) -> ty.Any: - """Return the value of a lazy field. +@state_array_support +def additional_args_converter(value: ty.Any) -> list[str]: + """Convert additional arguments to a list of strings.""" + if isinstance(value, str): + return shlex.split(value) + if not isinstance(value, ty.Sequence): + return [value] + return list(value) - Parameters - ---------- - wf : Workflow - the workflow the lazy field references - state_index : int, optional - the state index of the field to access - Returns - ------- - value : Any - the resolved value of the lazy-field - """ - from ..utils.typing import TypeParser # pylint: disable=import-outside-toplevel +@attrs.define(kw_only=True, auto_attribs=False, eq=False) +class ShellDef(TaskDef[ShellOutputsType]): - value = getattr(wf.inputs, self.field) - if TypeParser.is_subclass(self.type, StateArray) and not wf._pre_split: - _, split_depth = TypeParser.strip_splits(self.type) + _task_type = "shell" - def apply_splits(obj, depth): - if depth < 1: - return obj - return StateArray[self.type](apply_splits(i, depth - 1) for i in obj) + BASE_NAMES = ["additional_args"] - value = apply_splits(value, split_depth) - value = self._apply_cast(value) - return value + additional_args: list[str | File] = shell.arg( + name="additional_args", + default=attrs.Factory(list), + converter=additional_args_converter, + type=list[str | File], + sep=" ", + help="Additional free-form arguments to append to the end of the command.", + ) + RESERVED_FIELD_NAMES = TaskDef.RESERVED_FIELD_NAMES + ("cmdline",) -class LazyOutField(LazyField[T]): - attr_type = "output" + def _run(self, task: "Task[ShellDef]", rerun: bool = True) -> None: + """Run the shell command.""" + task.return_values = task.environment.execute(task) - def get_value( - self, wf: "pydra.Workflow", state_index: ty.Optional[int] = None - ) -> ty.Any: - """Return the value of a lazy field. + @property + def cmdline(self) -> str: + """The equivalent command line that would be submitted if the task were run on + the current working directory.""" + # Skip the executable, which can be a multi-part command, e.g. 'docker run'. + values = attrs_values(self) + values.update(template_update(self, output_dir=Path.cwd())) + cmd_args = self._command_args(values=values) + cmdline = cmd_args[0] + for arg in cmd_args[1:]: + # If there are spaces in the arg, and it is not enclosed by matching + # quotes, add quotes to escape the space. Not sure if this should + # be expanded to include other special characters apart from spaces + if " " in arg: + cmdline += " '" + arg + "'" + else: + cmdline += " " + arg + return cmdline + + def _command_args(self, values: dict[str, ty.Any]) -> list[str]: + """Get command line arguments""" + self._check_resolved() + self._check_rules() + # Drop none/empty values and optional path fields that are set to false + values = copy(values) # Create a copy so we can drop items from the dictionary + for field in list_fields(self): + fld_value = values[field.name] + if fld_value is None or (is_multi_input(field.type) and fld_value == []): + del values[field.name] + if is_fileset_or_union(field.type) and type(fld_value) is bool: + del values[field.name] + # Drop special fields that are added separately + del values["executable"] + del values["additional_args"] + # Add executable + pos_args = [ + self._command_shelltask_executable(field, self.executable), + ] # list for (position, command arg) + positions_provided = [0] + fields = {f.name: f for f in list_fields(self)} + for field_name in values: + pos_val = self._command_pos_args( + field=fields[field_name], + values=values, + positions_provided=positions_provided, + ) + if pos_val: + pos_args.append(pos_val) + # Sort command and arguments by position + cmd_args = position_sort(pos_args) + # pos_args values are each a list of arguments, so concatenate lists after sorting + command_args = sum(cmd_args, []) + # Append additional arguments to the end of the command + command_args += self.additional_args + return command_args + + def _command_shelltask_executable( + self, field: shell.arg, value: ty.Any + ) -> tuple[int, ty.Any]: + """Returning position and value for executable ShellTask input""" + pos = 0 # executable should be the first el. of the command + assert value + return pos, ensure_list(value, tuple2list=True) + + def _command_shelltask_args( + self, field: shell.arg, value: ty.Any + ) -> tuple[int, ty.Any]: + """Returning position and value for args ShellTask input""" + pos = -1 # assuming that args is the last el. of the command + if value is None: + return None + else: + return pos, ensure_list(value, tuple2list=True) + + def _command_pos_args( + self, + field: shell.arg, + values: dict[str, ty.Any], + positions_provided: list[str], + ) -> tuple[int, ty.Any]: + """ + Checking all additional input fields, setting pos to None, if position not set. + Creating a list with additional parts of the command that comes from + the specific field. Parameters ---------- - wf : Workflow - the workflow the lazy field references - state_index : int, optional - the state index of the field to access - - Returns - ------- - value : Any - the resolved value of the lazy-field """ - from ..utils.typing import TypeParser # pylint: disable=import-outside-toplevel - - node = getattr(wf, self.name) - result = node.result(state_index=state_index) - if result is None: - raise RuntimeError( - f"Could not find results of '{node.name}' node in a sub-directory " - f"named '{node.checksum}' in any of the cache locations.\n" - + "\n".join(str(p) for p in set(node.cache_locations)) - + f"\n\nThis is likely due to hash changes in '{self.name}' node inputs. " - f"Current values and hashes: {node.inputs}, " - f"{node.inputs._hashes}\n\n" - "Set loglevel to 'debug' in order to track hash changes " - "throughout the execution of the workflow.\n\n " - "These issues may have been caused by `bytes_repr()` methods " - "that don't return stable hash values for specific object " - "types across multiple processes (see bytes_repr() " - '"singledispatch "function in pydra/utils/hash.py).' - "You may need to write specific `bytes_repr()` " - "implementations (see `pydra.utils.hash.register_serializer`) or a " - "`__bytes_repr__()` dunder methods to handle one or more types in " - "your interface inputs." - ) - _, split_depth = TypeParser.strip_splits(self.type) - - def get_nested_results(res, depth: int): - if isinstance(res, list): - if not depth: - val = [r.get_output_field(self.field) for r in res] - else: - val = StateArray[self.type]( - get_nested_results(res=r, depth=depth - 1) for r in res - ) - else: - if res.errored: - raise ValueError( - f"Cannot retrieve value for {self.field} from {self.name} as " - "the node errored" - ) - val = res.get_output_field(self.field) - if depth and not wf._pre_split: - assert isinstance(val, ty.Sequence) and not isinstance(val, str) - val = StateArray[self.type](val) - return val - - value = get_nested_results(result, depth=split_depth) - value = self._apply_cast(value) - return value + if field.argstr is None and field.formatter is None: + # assuming that input that has no argstr is not used in the command, + # or a formatter is not provided too. + return None + if field.position is not None: + if not isinstance(field.position, int): + raise Exception( + f"position should be an integer, but {field.position} given" + ) + # checking if the position is not already used + if field.position in positions_provided: + raise Exception( + f"{field.name} can't have provided position, {field.position} is already used" + ) + positions_provided.append(field.position) -class StateArray(ty.List[T]): - """an array of values from, or to be split over in an array of nodes (see TaskBase.split()), - multiple nodes of the same task. Used in type-checking to differentiate between list - types and values for multiple nodes - """ + value = values[field.name] - def __repr__(self): - return f"{type(self).__name__}(" + ", ".join(repr(i) for i in self) + ")" + if field.readonly and type(value) is not bool and value is not attrs.NOTHING: + raise Exception(f"{field.name} is read only, the value can't be provided") + elif value is None and not field.readonly and field.formatter is None: + return None + cmd_add = [] + # formatter that creates a custom command argument + # it can take the value of the field, all inputs, or the value of other fields. + tp = optional_type(field.type) if is_optional(field.type) else field.type + if field.formatter: + call_args = inspect.getfullargspec(field.formatter) + call_args_val = {} + for argnm in call_args.args: + if argnm == "field": + call_args_val[argnm] = field + elif argnm == "inputs": + call_args_val[argnm] = values + else: + if argnm in values: + call_args_val[argnm] = values[argnm] + else: + raise AttributeError( + f"arguments of the formatter function from {field.name} " + f"has to be in inputs or be field, but {argnm} is used" + ) + cmd_el_str = field.formatter(**call_args_val) + cmd_el_str = cmd_el_str.strip().replace(" ", " ") + if cmd_el_str != "": + cmd_add += split_cmd(cmd_el_str) + elif tp is bool and "{" not in field.argstr: + # if value is simply True the original argstr is used, + # if False, nothing is added to the command. + if value is True: + cmd_add.append(field.argstr) + elif is_multi_input(tp) or tp is MultiOutputObj or tp is MultiOutputFile: + # if the field is MultiInputObj, it is used to create a list of arguments + for val in value or []: + split_values = copy(values) + split_values[field.name] = val + cmd_add += self._format_arg(field, split_values) + else: + cmd_add += self._format_arg(field, values) + return field.position, cmd_add + + def _format_arg(self, field: shell.arg, values: dict[str, ty.Any]) -> list[str]: + """Returning arguments used to specify the command args for a single inputs""" + value = values[field.name] + if ( + field.argstr.endswith("...") + and isinstance(value, ty.Iterable) + and not isinstance(value, (str, bytes)) + ): + argstr = field.argstr.replace("...", "") + # if argstr has a more complex form, with "{input_field}" + if "{" in argstr and "}" in argstr: + argstr_formatted_l = [] + for val in value: + split_values = copy(values) + split_values[field.name] = val + argstr_f = argstr_formatting(argstr, split_values) + argstr_formatted_l.append(f" {argstr_f}") + cmd_el_str = field.sep.join(argstr_formatted_l) + else: # argstr has a simple form, e.g. "-f", or "--f" + cmd_el_str = field.sep.join([f" {argstr} {val}" for val in value]) + else: + # in case there are ... when input is not a list + argstr = field.argstr.replace("...", "") + if isinstance(value, ty.Iterable) and not isinstance(value, (str, bytes)): + cmd_el_str = field.sep.join([str(val) for val in value]) + value = cmd_el_str + # if argstr has a more complex form, with "{input_field}" + if "{" in argstr and "}" in argstr: + cmd_el_str = argstr.replace(f"{{{field.name}}}", str(value)) + cmd_el_str = argstr_formatting(cmd_el_str, values) + else: # argstr has a simple form, e.g. "-f", or "--f" + if value: + cmd_el_str = f"{argstr} {value}" + else: + cmd_el_str = "" + return split_cmd(cmd_el_str) -def donothing(*args, **kwargs): - return None + def _rule_violations(self) -> list[str]: + errors = super()._rule_violations() + # if there is a value that has to be updated (e.g. single value from a list) + # getting all fields that should be formatted, i.e. {field_name}, ... + fields = list_fields(self) + available_template_names = [f.name for f in fields] + ["field", "inputs"] + for field in fields: + if field.argstr: + if unrecognised := [ + f + for f in parse_format_string(field.argstr) + if f not in available_template_names + ]: + errors.append( + f"Unrecognised field names in the argstr of {field.name} " + f"({field.argstr}): {unrecognised}" + ) + if getattr(field, "path_template", None): + if unrecognised := [ + f + for f in fields_in_formatter(field.path_template) + if f not in available_template_names + ]: + errors.append( + f"Unrecognised field names in the path_template of {field.name} " + f"({field.path_template}): {unrecognised}" + ) -@attr.s(auto_attribs=True, kw_only=True) -class TaskHook: - """Callable task hooks.""" + return errors - pre_run_task: ty.Callable = donothing - post_run_task: ty.Callable = donothing - pre_run: ty.Callable = donothing - post_run: ty.Callable = donothing + DEFAULT_COPY_COLLATION = FileSet.CopyCollation.adjacent - def __setattr__(self, attr, val): - if attr not in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: - raise AttributeError("Cannot set unknown hook") - super().__setattr__(attr, val) - def reset(self): - for val in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: - setattr(self, val, donothing) +def split_cmd(cmd: str | None): + """Splits a shell command line into separate arguments respecting quotes + Parameters + ---------- + cmd : str + Command line string or part thereof -def path_to_string(value): - """Convert paths to strings.""" - if isinstance(value, Path): - value = str(value) - elif isinstance(value, list) and len(value) and isinstance(value[0], Path): - value = [str(val) for val in value] - return value + Returns + ------- + str + the command line string split into process args + """ + if cmd is None: + return [] + # Check whether running on posix or Windows system + on_posix = platform.system() != "Windows" + args = shlex.split(cmd, posix=on_posix) + cmd_args = [] + for arg in args: + match = re.match("(['\"])(.*)\\1$", arg) + if match: + cmd_args.append(match.group(2)) + else: + cmd_args.append(arg) + return cmd_args -from . import core # noqa +def argstr_formatting(argstr: str, values: dict[str, ty.Any]): + """formatting argstr that have form {field_name}, + using values from inputs and updating with value_update if provided + """ + # if there is a value that has to be updated (e.g. single value from a list) + # getting all fields that should be formatted, i.e. {field_name}, ... + inp_fields = parse_format_string(argstr) + # formatting string based on the val_dict + argstr_formatted = argstr.format(**{n: values.get(n, "") for n in inp_fields}) + # removing extra commas and spaces after removing the field that have NOTHING + argstr_formatted = ( + argstr_formatted.replace("[ ", "[") + .replace(" ]", "]") + .replace("[,", "[") + .replace(",]", "]") + .strip() + ) + return argstr_formatted diff --git a/pydra/engine/state.py b/pydra/engine/state.py index befbf86b9d..b186a60097 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -3,15 +3,17 @@ from copy import deepcopy import itertools from functools import reduce - +import typing as ty from . import helpers_state as hlpst from .helpers import ensure_list -from .specs import BaseSpec +from pydra.utils.typing import StateArray, TypeParser -# TODO: move to State op = {".": zip, "*": itertools.product} +OutputsType = ty.TypeVar("OutputsType") + + class State: """ A class that specifies a State of all tasks. @@ -77,7 +79,14 @@ class State: """ - def __init__(self, name, splitter=None, combiner=None, other_states=None): + def __init__( + self, + name, + splitter=None, + combiner=None, + cont_dim=None, + other_states=None, + ): """ Initialize a state. @@ -99,6 +108,9 @@ def __init__(self, name, splitter=None, combiner=None, other_states=None): self.splitter = splitter # temporary combiner self.combiner = combiner + self.cont_dim = cont_dim or {} + self._inner_cont_dim = {} + self._inputs_ind = None # if other_states, the connections have to be updated if self.other_states: self.update_connections() @@ -110,6 +122,101 @@ def __str__(self): f"and combiner: {self.combiner}" ) + @property + def names(self): + """Return the names of the states.""" + previous_states_keys = { + f"_{v.name}": v.keys_final for v in self.inner_inputs.values() + } + names = [] + # iterating splitter_rpn + for token in self.splitter_rpn: + if token in [".", "*"]: # token is one of the input var + continue + # adding variable to the stack + if token.startswith("_"): + new_keys = previous_states_keys[token] + names += new_keys + else: + names.append(token) + return names + + def depth(self, before_combine: bool = False) -> int: + """Return the number of splits of the state, i.e. the number nested + state arrays to wrap around the type of lazy out fields + + Parameters + ---------- + before_combine : :obj:`bool` + if True, the depth is after combining the fields, otherwise it is before + any combinations + + Returns + ------- + int + number of splits in the state (i.e. linked splits only add 1) + """ + + # replace field names with 1 or 0 (1 if the field is included in the state) + include_rpn = [ + ( + s + if s in [".", "*"] + else (1 if before_combine else int(s not in self.combiner)) + ) + for s in self.splitter_rpn + ] + + stack = [] + for opr in include_rpn: + if opr == ".": + assert len(stack) >= 2 + opr1 = stack.pop() + opr2 = stack.pop() + stack.append(opr1 and opr2) + elif opr == "*": + assert len(stack) >= 2 + stack.append(stack.pop() + stack.pop()) + else: + stack.append(opr) + assert len(stack) == 1 + return stack[0] + + def nest_output_type(self, type_: type) -> type: + """Nests a type of an output field in a combination of lists and state-arrays + based on the state's splitter and combiner + + Parameters + ---------- + type_ : type + the type of the output field + + Returns + ------- + type + the nested type of the output field + """ + + state_array_depth = self.depth() + + # If there is a combination, it will get flattened into a single list + if self.depth(before_combine=True) > state_array_depth: + type_ = list[type_] + + # Nest the uncombined state arrays around the type + for _ in range(state_array_depth): + type_ = StateArray[type_] + return type_ + + @classmethod + def combine_state_arrays(cls, type_: type) -> type: + """Collapses (potentially nested) state array(s) into a single list""" + if TypeParser.get_origin(type_) is StateArray: + # Implicitly combine any remaining uncombined states into a single + # list + type_ = list[TypeParser.strip_splits(type_)[0]] + return type_ + @property def splitter(self): """Get the splitter of the state.""" @@ -190,6 +297,17 @@ def current_splitter(self): else: return self.splitter + @property + def inputs_ind(self): + """dictionary for every state that contains indices for all task inputs + (i.e. inputs that are relevant for current task, can be outputs from previous nodes) + """ + if self._inputs_ind is None: + raise RuntimeError( + "inputs_ind is not set, please run prepare_states() on the state first" + ) + return self._inputs_ind + @current_splitter.setter def current_splitter(self, current_splitter): self._current_splitter = current_splitter @@ -737,7 +855,11 @@ def combiner_validation(self): if set(self._combiner) - set(self.splitter_rpn): raise hlpst.PydraStateError("all combiners have to be in the splitter") - def prepare_states(self, inputs, cont_dim=None): + def prepare_states( + self, + inputs: dict[str, ty.Any], + cont_dim: dict[str, int] | None = None, + ): """ Prepare a full list of state indices and state values. @@ -746,32 +868,17 @@ def prepare_states(self, inputs, cont_dim=None): State Values specific elements from inputs that can be used running interfaces - - Parameters - ---------- - inputs : :obj:`dict` - inputs of the task - cont_dim : :obj:`dict` or `None` - container's dimensions for a specific input's fields """ # checking if splitter and combiner have valid forms self.splitter_validation() self.combiner_validation() self.set_input_groups() - # container dimension for each input, specifies how nested the input is - if cont_dim: - self.cont_dim = cont_dim - else: - self.cont_dim = {} - if isinstance(inputs, BaseSpec): - self.inputs = hlpst.inputs_types_to_dict(self.name, inputs) - else: - self.inputs = inputs + self.inputs = inputs + if not self.cont_dim: + self.cont_dim = cont_dim or {} if self.other_states: + st: State for nm, (st, _) in self.other_states.items(): - # I think now this if is never used - if not hasattr(st, "states_ind"): - st.prepare_states(self.inputs, cont_dim=cont_dim) self.inputs.update(st.inputs) self.cont_dim.update(st.cont_dim) @@ -892,7 +999,7 @@ def prepare_inputs(self): """ if not self.other_states: - self.inputs_ind = self.states_ind + self._inputs_ind = self.states_ind else: # elements from the current node (the current part of the splitter) if self.current_splitter_rpn: @@ -943,11 +1050,11 @@ def prepare_inputs(self): inputs_ind = [] # iter_splits using inputs from current state/node - self.inputs_ind = list(hlpst.iter_splits(inputs_ind, keys_inp)) + self._inputs_ind = list(hlpst.iter_splits(inputs_ind, keys_inp)) # removing elements that are connected to inner splitter # TODO - add tests to test_workflow.py (not sure if we want to remove it) for el in connected_to_inner: - [dict.pop(el) for dict in self.inputs_ind] + [dict.pop(el) for dict in self._inputs_ind] def splits(self, splitter_rpn): """ @@ -1091,3 +1198,34 @@ def _single_op_splits(self, op_single): val = op["*"](val_ind) keys = [op_single] return val, keys + + def _get_element(self, value: ty.Any, field_name: str, ind: int) -> ty.Any: + """ + Extracting element of the inputs taking into account + container dimension of the specific element that can be set in self.state.cont_dim. + If input name is not in cont_dim, it is assumed that the input values has + a container dimension of 1, so only the most outer dim will be used for splitting. + + Parameters + ---------- + value : Any + inputs of the task + field_name : str + name of the input field + ind : int + index of the element + + Returns + ------- + Any + specific element of the input field + """ + if f"{self.name}.{field_name}" in self.cont_dim: + return list( + hlpst.flatten( + ensure_list(value), + max_depth=self.cont_dim[f"{self.name}.{field_name}"], + ) + )[ind] + else: + return value[ind] diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index fe3e598c21..b58fd03a4c 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -3,206 +3,384 @@ import asyncio import typing as ty import pickle -from uuid import uuid4 +import os +from pathlib import Path +from tempfile import mkdtemp +from copy import copy, deepcopy +from datetime import datetime +from collections import defaultdict +import attrs from .workers import Worker, WORKERS -from .core import is_workflow -from .helpers import get_open_loop, load_and_run_async -from ..utils.hash import PersistentCache - +from .graph import DiGraph +from .helpers import ( + get_open_loop, + list_fields, + attrs_values, +) +from pydra.utils.hash import PersistentCache +from pydra.utils.typing import StateArray +from pydra.engine.lazy import LazyField +from .audit import Audit +from .core import Task +from pydra.utils.messenger import AuditFlag, Messenger +from pydra.utils import default_run_cache_dir +from pydra.design import workflow +from .state import State import logging logger = logging.getLogger("pydra.submitter") +if ty.TYPE_CHECKING: + from .node import Node + from .specs import WorkflowDef, TaskDef, TaskOutputs, TaskHooks, Result + from .core import Workflow + from .environments import Environment -# TODO: runnable in init or run -class Submitter: - """Send a task to the execution backend.""" - def __init__(self, plugin: ty.Union[str, ty.Type[Worker]] = "cf", **kwargs): - """ - Initialize task submission. +DefType = ty.TypeVar("DefType", bound="TaskDef") +OutputType = ty.TypeVar("OutputType", bound="TaskOutputs") - Parameters - ---------- - plugin : :obj:`str` or :obj:`ty.Type[pydra.engine.core.Worker]` - Either the identifier of the execution backend or the worker class itself. - Default is ``cf`` (Concurrent Futures). - **kwargs - Additional keyword arguments to pass to the worker. +# Used to flag development mode of Audit +develop = False - """ +WORKER_KWARG_FAIL_NOTE = "Attempting to instantiate worker submitter" + + +class Submitter: + """Send a task to the execution backend. + + Parameters + ---------- + cache_dir : os.PathLike, optional + Cache directory where the working directory/results for the task will be + stored, by default None + worker : str or Worker, optional + The worker to use, by default "cf" + environment: Environment, optional + The execution environment to use, by default None + cache_locations : list[os.PathLike], optional + Alternate cache locations to check for pre-computed results, by default None + audit_flags : AuditFlag, optional + Auditing configuration, by default AuditFlag.NONE + messengers : list, optional + Messengers, by default None + messenger_args : dict, optional + Messenger arguments, by default None + clean_stale_locks : bool, optional + Whether to clean stale lock files, i.e. lock files that were created before the + start of the current run. Don't set if using a global cache where there are + potentially multiple workflows that are running concurrently. By default (None), + lock files will be cleaned if the *debug* worker is used + **kwargs : dict + Keyword arguments to pass on to the worker initialisation + """ + + cache_dir: os.PathLike + worker: Worker + environment: "Environment | None" + cache_locations: list[os.PathLike] + audit_flags: AuditFlag + messengers: ty.Iterable[Messenger] + messenger_args: dict[str, ty.Any] + clean_stale_locks: bool + run_start_time: datetime | None + + def __init__( + self, + /, + cache_dir: os.PathLike | None = None, + worker: str | ty.Type[Worker] | Worker | None = "debug", + environment: "Environment | None" = None, + cache_locations: list[os.PathLike] | None = None, + audit_flags: AuditFlag = AuditFlag.NONE, + messengers: ty.Iterable[Messenger] | None = None, + messenger_args: dict[str, ty.Any] | None = None, + clean_stale_locks: bool | None = None, + **kwargs, + ): + + from pydra.engine.environments import Native + + if worker is None: + worker = "debug" + + from . import check_latest_version + + if Task._etelemetry_version_data is None: + Task._etelemetry_version_data = check_latest_version() + + self.audit = Audit( + audit_flags=audit_flags, + messengers=messengers, + messenger_args=messenger_args, + develop=develop, + ) + if cache_dir is None: + cache_dir = default_run_cache_dir + cache_dir = Path(cache_dir).resolve() + cache_dir.mkdir(parents=True, exist_ok=True) + + self.cache_dir = cache_dir + self.cache_locations = cache_locations + self.environment = environment if environment is not None else Native() self.loop = get_open_loop() self._own_loop = not self.loop.is_running() - if isinstance(plugin, str): - self.plugin = plugin - try: - worker_cls = WORKERS[self.plugin] - except KeyError: - raise NotImplementedError(f"No worker for '{self.plugin}' plugin") + if isinstance(worker, Worker): + self._worker = worker + self.worker_name = worker.plugin_name else: + if isinstance(worker, str): + self.worker_name = worker + try: + worker_cls = WORKERS[self.worker_name] + except KeyError: + raise NotImplementedError( + f"No worker for '{self.worker_name}' plugin" + ) + else: + try: + self.worker_name = worker.plugin_name + except AttributeError: + raise ValueError( + "Worker class must have a 'plugin_name' str attribute" + ) + worker_cls = worker try: - self.plugin = plugin.plugin_name - except AttributeError: - raise ValueError("Worker class must have a 'plugin_name' str attribute") - worker_cls = plugin - self.worker = worker_cls(**kwargs) - self.worker.loop = self.loop - - def __call__(self, runnable, cache_locations=None, rerun=False, environment=None): - """Submitter run function.""" - if cache_locations is not None: - runnable.cache_locations = cache_locations - self.loop.run_until_complete( - self.submit_from_call(runnable, rerun, environment) + self._worker = worker_cls(**kwargs) + except TypeError as e: + e.add_note(WORKER_KWARG_FAIL_NOTE) + raise + self.run_start_time = None + self.clean_stale_locks = ( + clean_stale_locks + if clean_stale_locks is not None + else (self.worker_name == "debug") ) - PersistentCache().clean_up() - return runnable.result() + self.worker_kwargs = kwargs + self._worker.loop = self.loop + + @property + def worker(self): + if self._worker is None: + raise RuntimeError( + "Cannot access worker of unpickeld submitter (typically in subprocess)" + ) + return self._worker - async def submit_from_call(self, runnable, rerun, environment): - """ - This coroutine should only be called once per Submitter call, - and serves as the bridge between sync/async lands. + def __call__( + self, + task_def: "TaskDef[OutputType]", + hooks: "TaskHooks | None" = None, + raise_errors: bool | None = None, + rerun: bool = False, + ) -> "Result[OutputType]": + """Submitter run function. - There are 4 potential paths based on the type of runnable: - 0) Workflow has a different plugin than a submitter - 1) Workflow without State - 2) Task without State - 3) (Workflow or Task) with State + Parameters + ---------- + task_def : :obj:`~pydra.engine.specs.TaskDef` + The task definition to run + hooks : :obj:`~pydra.engine.specs.TaskHooks`, optional + Task hooks, callable functions called as the task is setup and torn down, + by default no functions are called at the hooks + raise_errors : bool, optional + Whether to raise errors, by default True if the 'debug' worker is used, + otherwise False + rerun : bool, optional + Whether to force the re-computation of the task results even if existing + results are found, by default False - Once Python 3.10 is the minimum, this should probably be refactored into using - structural pattern matching. + Returns + ------- + result : Any + The result of the task """ - if is_workflow(runnable): # TODO: env to wf - # connect and calculate the checksum of the graph before running - runnable._connect_and_propagate_to_tasks(override_task_caches=True) - # 0 - if runnable.plugin and runnable.plugin != self.plugin: - # if workflow has a different plugin it's treated as a single element - await self.worker.run_el(runnable, rerun=rerun) - # 1 - if runnable.state is None: - await runnable._run(self, rerun=rerun) - # 3 - else: - await self.expand_runnable(runnable, wait=True, rerun=rerun) - runnable._reset() - else: - # 2 - if runnable.state is None: - # run_el should always return a coroutine - await self.worker.run_el(runnable, rerun=rerun, environment=environment) - # 3 - else: - await self.expand_runnable(runnable, wait=True, rerun=rerun) # TODO - return True + from pydra.engine.environments import Environment - async def expand_runnable(self, runnable, wait=False, rerun=False): - """ - This coroutine handles state expansion. + if raise_errors is None: + raise_errors = self.worker_name == "debug" + if not isinstance(raise_errors, bool): + raise TypeError( + f"'raise_errors' must be a boolean or None, not {type(raise_errors)}" + ) - Removes any states from `runnable`. If `wait` is - set to False (default), aggregates all worker - execution coroutines and returns them. If `wait` is - True, waits for all coroutines to complete / error - and returns None. + task_def._check_rules() + # If the outer task is split, create an implicit workflow to hold the split nodes + if task_def._splitter: + from pydra.engine.specs import TaskDef - Parameters - ---------- - runnable : pydra Task - Task instance (`Task`, `Workflow`) - wait : bool (False) - Await all futures before completing + state = State( + name="outer_split", + splitter=deepcopy(task_def._splitter), + combiner=deepcopy(task_def._combiner), + cont_dim=deepcopy(task_def._cont_dim), + ) - Returns - ------- - futures : set or None - Coroutines for :class:`~pydra.engine.core.TaskBase` execution. + def wrap_type(tp): + tp = state.nest_output_type(tp) + tp = state.combine_state_arrays(tp) + return tp - """ - if runnable.plugin and runnable.plugin != self.plugin: - raise NotImplementedError() + output_types = { + o.name: wrap_type(o.type) for o in list_fields(task_def.Outputs) + } - futures = set() - if runnable.state is None: - raise Exception("Only runnables with state should reach here") + @workflow.define(outputs=output_types) + def Split( + defn: TaskDef, output_types: dict, environment: Environment | None + ): + node = workflow.add(defn, environment=environment, hooks=hooks) + return tuple(getattr(node, o) for o in output_types) - task_pkl = await prepare_runnable_with_state(runnable) + task_def = Split( + defn=task_def, output_types=output_types, environment=self.environment + ) - for sidx in range(len(runnable.state.states_val)): - if is_workflow(runnable): - # job has no state anymore - futures.add( - # This unpickles and runs workflow - why are we pickling? - asyncio.create_task(load_and_run_async(task_pkl, sidx, self, rerun)) - ) + environment = None + elif task_def._combiner: + raise ValueError( + f"Task {self} is marked for combining, but not splitting. " + "Use the `split` method to split the task before combining." + ) + else: + environment = self.environment + + task = Task( + task_def, + submitter=self, + name="main", + environment=environment, + hooks=hooks, + ) + try: + self.run_start_time = datetime.now() + if self.worker.is_async: # Only workflow tasks can be async + self.loop.run_until_complete(self.worker.run_async(task, rerun=rerun)) else: - futures.add(self.worker.run_el((sidx, task_pkl, runnable), rerun=rerun)) - - if wait and futures: - # if wait is True, we are at the end of the graph / state expansion. - # Once the remaining jobs end, we will exit `submit_from_call` - await asyncio.gather(*futures) - return - # pass along futures to be awaited independently - return futures + self.worker.run(task, rerun=rerun) + except Exception as e: + msg = ( + f"Full crash report for {type(task_def).__name__!r} task is here: " + + str(task.output_dir / "_error.pklz") + ) + if raise_errors: + e.add_note(msg) + raise e + else: + logger.error("\nTask execution failed\n%s", msg) + finally: + self.run_start_time = None + PersistentCache().clean_up() + result = task.result() + if result is None: + if task.lockfile.exists(): + raise RuntimeError( + f"Task {task} has a lockfile, but no result was found. " + "This may be due to another submission that is currently running, or the hard " + "interrupt (e.g. a debugging abortion) interrupting a previous run. " + f"In the case of an interrupted run, please remove {str(task.lockfile)!r} " + "and resubmit." + ) + raise RuntimeError(f"Task {task} has no result in {str(task.output_dir)!r}") + return result + + def __getstate__(self): + state = self.__dict__.copy() + # Remove the unpicklable entries or those that should not be pickled + # When unpickled (in another process) the submitter can't be called + state["loop"] = None + state["_worker"] = None + return state + + def __setstate__(self, state): + self.__dict__.update(state) + # Restore the loop and worker + self.loop = get_open_loop() + self._worker = WORKERS[self.worker_name](**self.worker_kwargs) + self.worker.loop = self.loop - async def expand_workflow(self, wf, rerun=False): - """ - Expand and execute a stateless :class:`~pydra.engine.core.Workflow`. - This method is only reached by `Workflow._run_task`. + def expand_workflow(self, workflow_task: "Task[WorkflowDef]", rerun: bool) -> None: + """Expands and executes a workflow task synchronously. Typically only used during + debugging and testing, as the asynchronous version is more efficient. Parameters ---------- - wf : :obj:`~pydra.engine.core.Workflow` + task : :obj:`~pydra.engine.core.Task[WorkflowDef]` Workflow Task object - Returns - ------- - wf : :obj:`pydra.engine.core.Workflow` - The computed workflow + """ + # Construct the workflow + wf = workflow_task.definition.construct() + # Generate the execution graph + exec_graph = wf.execution_graph(submitter=self) + tasks = self.get_runnable_tasks(exec_graph) + while tasks or any(not n.done for n in exec_graph.nodes): + for task in tasks: + self.worker.run(task, rerun=rerun) + tasks = self.get_runnable_tasks(exec_graph) + workflow_task.return_values = {"workflow": wf, "exec_graph": exec_graph} + async def expand_workflow_async( + self, workflow_task: "Task[WorkflowDef]", rerun: bool + ) -> None: """ - # creating a copy of the graph that will be modified - # the copy contains new lists with original runnable objects - graph_copy = wf.graph.copy() - # resetting uid for nodes in the copied workflows - for nd in graph_copy.nodes: - nd._uid = uuid4().hex + Expand and execute a workflow task asynchronously. + + Parameters + ---------- + task : :obj:`~pydra.engine.core.Task[WorkflowDef]` + Workflow Task object + """ + wf = workflow_task.definition.construct() + # Generate the execution graph + exec_graph = wf.execution_graph(submitter=self) # keep track of pending futures task_futures = set() - tasks, tasks_follow_errored = get_runnable_tasks(graph_copy) - while tasks or task_futures or graph_copy.nodes: + tasks = self.get_runnable_tasks(exec_graph) + while tasks or task_futures or any(not n.done for n in exec_graph.nodes): if not tasks and not task_futures: # it's possible that task_futures is empty, but not able to get any # tasks from graph_copy (using get_runnable_tasks) # this might be related to some delays saving the files - # so try to get_runnable_tasks for another minut + # so try to get_runnable_tasks for another minute ii = 0 - while not tasks and graph_copy.nodes: - tasks, follow_err = get_runnable_tasks(graph_copy) + while not tasks and any(not n.done for n in exec_graph.nodes): + tasks = self.get_runnable_tasks(exec_graph) ii += 1 # don't block the event loop! await asyncio.sleep(1) - if ii > 60: + if ii > 10: + not_done = "\n".join( + ( + f"{n.name}: started={bool(n.started)}, " + f"blocked={list(n.blocked)}, queued={list(n.queued)}" + ) + for n in exec_graph.nodes + if not n.done + ) msg = ( - f"Graph of '{wf}' workflow is not empty, but not able to get " - "more tasks - something has gone wrong when retrieving the " - "results predecessors:\n\n" + "Something has gone wrong when retrieving the predecessor " + f"results. Not able to get any more tasks but he following " + f"nodes of the {wf.name!r} workflow are not done:\n{not_done}\n\n" + ) + not_done = [n for n in exec_graph.nodes if not n.done] + msg += "\n" + ", ".join( + f"{t.name}: {t.done}" for t in not_done[0].queued.values() ) - # Get blocked tasks and the predecessors they are waiting on - outstanding = { + # Get blocked tasks and the predecessors they are blocked on + outstanding: dict[Task[DefType], list[Task[DefType]]] = { t: [ - p for p in graph_copy.predecessors[t.name] if not p.done + p for p in exec_graph.predecessors[t.name] if not p.done ] - for t in graph_copy.sorted_nodes + for t in exec_graph.sorted_nodes } hashes_have_changed = False - for task, waiting_on in outstanding.items(): - if not waiting_on: + for task, blocked_on in outstanding.items(): + if not blocked_on: continue msg += f"- '{task.name}' node blocked due to\n" - for pred in waiting_on: + for pred in blocked_on: if ( pred.checksum != wf.inputs._graph_checksums[pred.name] @@ -234,29 +412,13 @@ async def expand_workflow(self, wf, rerun=False): ) raise RuntimeError(msg) for task in tasks: - # grab inputs if needed - logger.debug(f"Retrieving inputs for {task}") - # TODO: add state idx to retrieve values to reduce waiting - task.inputs.retrieve_values(wf) - if task.state: - for fut in await self.expand_runnable(task, rerun=rerun): - task_futures.add(fut) - # expand that workflow - elif is_workflow(task): - await task._run(self, rerun=rerun) - # single task + if task.is_async: + await self.worker.run_async(task, rerun=rerun) else: - task_futures.add(self.worker.run_el(task, rerun=rerun)) + task_futures.add(self.worker.run(task, rerun=rerun)) task_futures = await self.worker.fetch_finished(task_futures) - tasks, follow_err = get_runnable_tasks(graph_copy) - # updating tasks_errored - for key, val in follow_err.items(): - tasks_follow_errored.setdefault(key, []) - tasks_follow_errored[key] += val - - for key, val in tasks_follow_errored.items(): - setattr(getattr(wf, key), "_errored", val) - return wf + tasks = self.get_runnable_tasks(exec_graph) + workflow_task.return_values = {"workflow": wf, "exec_graph": exec_graph} def __enter__(self): return self @@ -268,80 +430,380 @@ def close(self): """ Close submitter. - Do not close previously running loop. + Do not close previously queued loop. """ self.worker.close() if self._own_loop: self.loop.close() + def _check_locks(self, tasks: list[Task]) -> None: + """Check for stale lock files and remove them.""" + if self.clean_stale_locks: + for task in tasks: + start_time = task.run_start_time + if start_time and start_time < self.run_start_time: + task.lockfile.unlink() -def get_runnable_tasks(graph): - """Parse a graph and return all runnable tasks.""" - tasks = [] - to_remove = [] - # tasks that follow task that raises an error - following_err = dict() - for tsk in graph.sorted_nodes: - if tsk not in graph.sorted_nodes: - continue - # since the list is sorted (breadth-first) we can stop - # when we find a task that depends on any task that is already in tasks - if set(graph.predecessors[tsk.name]).intersection(set(tasks)): - break - _is_runnable = is_runnable(graph, tsk) - if _is_runnable is True: - tasks.append(tsk) - to_remove.append(tsk) - elif _is_runnable is False: - continue - else: # a previous task had an error - errored_task = _is_runnable - # removing all successors of the errored task - for task_err in errored_task: - task_to_remove = graph.remove_successors_nodes(task_err) - for tsk in task_to_remove: - # adding tasks that were removed from the graph - # due to the error in the errored_task - following_err.setdefault(tsk, []) - following_err[tsk].append(task_err.name) - - # removing tasks that are ready to run from the graph - for nd in to_remove: - graph.remove_nodes(nd) - return tasks, following_err - - -def is_runnable(graph, obj): - """Check if a task within a graph is runnable.""" - connections_to_remove = [] - pred_errored = [] - is_done = None - for pred in graph.predecessors[obj.name]: - try: - is_done = pred.done - except ValueError: - pred_errored.append(pred) + def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: + """Parse a graph and return all runnable tasks. + + Parameters + ---------- + graph : :obj:`~pydra.engine.graph.DiGraph` + Graph object + + Returns + ------- + tasks : list of :obj:`~pydra.engine.core.Task` + List of runnable tasks + following_err : dict[NodeToExecute, list[str]] + Dictionary of tasks that are blocked by errored tasks + """ + tasks = [] + not_started = set() + node: NodeExecution + for node in graph.sorted_nodes: + if node.done: + continue + # since the list is sorted (breadth-first) we can stop + # when we find a task that depends on any task that is already in tasks + if set(graph.predecessors[node.name]).intersection(not_started): + break + # Record if the node has not been started + if not node.started: + not_started.add(node) + tasks.extend(node.get_runnable_tasks(graph)) + self._check_locks(tasks) + return tasks + + @property + def cache_dir(self): + """Get the location of the cache directory.""" + return self._cache_dir + + @cache_dir.setter + def cache_dir(self, location): + if location is not None: + self._cache_dir = Path(location).resolve() + self._cache_dir.mkdir(parents=False, exist_ok=True) + else: + self._cache_dir = mkdtemp() + self._cache_dir = Path(self._cache_dir).resolve() + + +class NodeExecution(ty.Generic[DefType]): + """A wrapper around a workflow node containing the execution state of the tasks that + are generated from it""" + + name: str + node: "Node" + submitter: Submitter + + # List of tasks that were completed successfully + successful: dict[int, list["Task[DefType]"]] + # List of tasks that failed + errored: dict[int, "Task[DefType]"] + # List of tasks that couldn't be run due to upstream errors + unrunnable: dict[int, list["Task[DefType]"]] + # List of tasks that are queued + queued: dict[int, "Task[DefType]"] + # List of tasks that are queued + running: dict[int, tuple["Task[DefType]", datetime]] + # List of tasks that are blocked on other tasks to complete before they can be run + blocked: dict[int, "Task[DefType]"] | None + + _tasks: dict[int | None, "Task[DefType]"] | None + + workflow: "Workflow" + + graph: DiGraph["NodeExecution"] | None + + def __init__( + self, + node: "Node", + submitter: Submitter, + workflow: "Workflow", + ): + self.name = node.name + self.node = node + self.submitter = submitter + # Initialize the state dictionaries + self._tasks = None + self.blocked = None + self.successful = {} + self.errored = {} + self.queued = {} + self.running = {} # Not used in logic, but may be useful for progress tracking + self.unrunnable = defaultdict(list) + # Prepare the state to be run + if node.state: + self.state = node.state + self.state.prepare_states(self.node.state_values) + self.state.prepare_inputs() + else: + self.state = None + self.state_names = self.node.state.names if self.node.state else [] + self.workflow = workflow + self.graph = None + + def __repr__(self): + return ( + f"NodeExecution(name={self.name!r}, blocked={list(self.blocked)}, " + f"queued={list(self.queued)}, running={list(self.running)}, " + f"successful={list(self.successful)}, errored={list(self.errored)}, " + f"unrunnable={list(self.unrunnable)})" + ) + + @property + def inputs(self) -> "Node.Inputs": + return self.node.inputs + + @property + def _definition(self) -> "Node": + return self.node._definition - if is_done is True: - connections_to_remove.append(pred) - elif is_done is False: + @property + def tasks(self) -> ty.Generator["Task[DefType]", None, None]: + if self._tasks is None: + self._tasks = {t.state_index: t for t in self._generate_tasks()} + return self._tasks.values() + + def get_jobs(self, final_index: int | None = None) -> "Task | StateArray[Task]": + """Get the jobs that match a given state index. + + Parameters + ---------- + final_index : int, optional + The index of the output state array (i.e. after any combinations) of the + job to get, by default None + + Returns + ------- + matching : Task | StateArray[Task] + The task or tasks that match the given index + """ + if not self.tasks: # No jobs, return empty state array + return StateArray() + if not self.node.state: # Return the singular job + assert final_index is None + task = self._tasks[None] + return task + if final_index is None: # return all jobs in a state array + return StateArray(self._tasks.values()) + if not self.node.state.combiner: # Select the job that matches the index + task = self._tasks[final_index] + return task + # Get a slice of the tasks that match the given index of the state array of the + # combined values + final_index = set(self.node.state.states_ind_final[final_index].items()) + return StateArray( + self._tasks[i] + for i, ind in enumerate(self.node.state.states_ind) + if set(ind.items()).issuperset(final_index) + ) + + @property + def started(self) -> bool: + return ( + self.successful + or self.errored + or self.unrunnable + or self.queued + or self.blocked is not None + ) + + @property + def done(self) -> bool: + self.update_status() + if not self.started: return False + # Check to see if any previously queued tasks have completed + return not (self.queued or self.blocked or self.running) + + def update_status(self) -> None: + """Updates the status of the tasks in the node.""" + if not self.started: + return + # Check to see if any previously queued tasks have completed + for index, task in list(self.queued.items()): + try: + is_done = task.done + except ValueError: + errored = True + is_done = False + else: + errored = False + if is_done: + self.successful[task.state_index] = self.queued.pop(index) + elif task.errored or errored: + self.errored[task.state_index] = self.queued.pop(index) + elif task.run_start_time: + self.running[task.state_index] = ( + self.queued.pop(index), + task.run_start_time, + ) + # Check to see if any previously running tasks have completed + for index, (task, start_time) in list(self.running.items()): + if task.done: + self.successful[task.state_index] = self.running.pop(index)[0] + elif task.errored: + self.errored[task.state_index] = self.running.pop(index)[0] + + @property + def all_failed(self) -> bool: + return (self.unrunnable or self.errored) and not ( + self.successful or self.blocked or self.queued + ) + + def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: + if not self.node.state: + yield Task( + definition=self._resolve_lazy_inputs(task_def=self.node._definition), + submitter=self.submitter, + environment=self.node._environment, + hooks=self.node._hooks, + name=self.node.name, + ) + else: + for index, split_defn in enumerate(self._split_definition()): + yield Task( + definition=split_defn, + submitter=self.submitter, + environment=self.node._environment, + name=self.node.name, + hooks=self.node._hooks, + state_index=index, + ) + + def _resolve_lazy_inputs( + self, + task_def: "TaskDef", + state_index: int | None = None, + ) -> "TaskDef": + """Resolves lazy fields in the task definition by replacing them with their + actual values calculated by upstream jobs. + + Parameters + ---------- + task_def : TaskDef + The definition to resolve the lazy fields of + state_index : int, optional + The state index for the workflow, by default None + + Returns + ------- + TaskDef + The task definition with all lazy fields resolved + """ + resolved = {} + for name, value in attrs_values(task_def).items(): + if isinstance(value, LazyField): + resolved[name] = value._get_value( + workflow=self.workflow, graph=self.graph, state_index=state_index + ) + return attrs.evolve(task_def, **resolved) + + def _split_definition(self) -> dict[int, "TaskDef[OutputType]"]: + """Split the definition into the different states it will be run over + + Parameters + ---------- + values : dict[str, Any] + The values to use for the split + """ + # TODO: doesn't work properly for more cmplicated wf (check if still an issue) + if not self.node.state: + return {None: self.node._definition} + split_defs = [] + for input_ind in self.node.state.inputs_ind: + resolved = {} + for inpt_name in set(self.node.input_names): + value = getattr(self._definition, inpt_name) + state_key = f"{self.node.name}.{inpt_name}" + if isinstance(value, LazyField): + resolved[inpt_name] = value._get_value( + workflow=self.workflow, + graph=self.graph, + state_index=input_ind.get(state_key), + ) + elif state_key in input_ind: + resolved[inpt_name] = self.node.state._get_element( + value=value, + field_name=inpt_name, + ind=input_ind[state_key], + ) + split_defs.append(attrs.evolve(self.node._definition, **resolved)) + return split_defs + + def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: + """For a given node, check to see which tasks have been successfully run, are ready + to run, can't be run due to upstream errors, or are blocked on other tasks to complete. - if pred_errored: - return pred_errored + Parameters + ---------- + node : :obj:`~pydra.engine.node.Node` + The node object to get the tasks for + graph : :obj:`~pydra.engine.graph.DiGraph` + Graph object - # removing nodes that are done from connections - for nd in connections_to_remove: - graph.remove_nodes_connections(nd) - return True + Returns + ------- + runnable : list[NodeExecution] + List of tasks that are ready to run + """ + runnable: list["Task[DefType]"] = [] + self.tasks # Ensure tasks are loaded + if not self.started: + assert self._tasks is not None + self.blocked = copy(self._tasks) + # Check to see if any blocked tasks are now runnable/unrunnable + for index, task in list(self.blocked.items()): + pred: NodeExecution + is_runnable = True + states_ind = ( + list(self.node.state.states_ind[index].items()) + if self.node.state + else [] + ) + for pred in graph.predecessors[self.node.name]: + if pred.node.state: + pred_states_ind = { + (k, i) for k, i in states_ind if k.startswith(pred.name + ".") + } + pred_inds = [ + i + for i, ind in enumerate(pred.node.state.states_ind) + if set(ind.items()).issuperset(pred_states_ind) + ] + else: + pred_inds = [None] + if not all(i in pred.successful for i in pred_inds): + is_runnable = False + blocked = True + if pred_errored := [i for i in pred_inds if i in pred.errored]: + self.unrunnable[index].extend( + [pred.errored[i] for i in pred_errored] + ) + blocked = False + if pred_unrunnable := [ + i for i in pred_inds if i in pred.unrunnable + ]: + self.unrunnable[index].extend( + [pred.unrunnable[i] for i in pred_unrunnable] + ) + blocked = False + if not blocked: + del self.blocked[index] + break + if is_runnable: + runnable.append(self.blocked.pop(index)) + self.queued.update({t.state_index: t for t in runnable}) + return list(self.queued.values()) -async def prepare_runnable_with_state(runnable): - runnable.state.prepare_states(runnable.inputs, cont_dim=runnable.cont_dim) - runnable.state.prepare_inputs() - logger.debug(f"Expanding {runnable} into {len(runnable.state.states_val)} states") +async def prepare_runnable(runnable): return runnable.pickle_task() diff --git a/pydra/engine/task.py b/pydra/engine/task.py index cb55d9e390..c60ff23b15 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -41,166 +41,34 @@ from __future__ import annotations -import platform -import re -import attr -import inspect -import typing as ty -import shlex +import attrs +import json from pathlib import Path -import warnings -import cloudpickle as cp -from fileformats.core import FileSet, DataType -from .core import TaskBase, is_lazy -from ..utils.messenger import AuditFlag +from .core import Task +from pydra.utils.messenger import AuditFlag from .specs import ( - BaseSpec, - SpecInfo, - ShellSpec, - ShellOutSpec, - attr_fields, + PythonDef, + ShellDef, + attrs_fields, ) from .helpers import ( - ensure_list, - position_sort, - argstr_formatting, - output_from_inputfields, - parse_copyfile, + attrs_values, ) -from .helpers_file import template_update -from ..utils.typing import TypeParser +from pydra.engine.helpers_file import is_local_file from .environments import Native -class FunctionTask(TaskBase): +class PythonTask(Task): """Wrap a Python callable as a task element.""" - def __init__( - self, - func: ty.Callable, - audit_flags: AuditFlag = AuditFlag.NONE, - cache_dir=None, - cache_locations=None, - input_spec: ty.Optional[ty.Union[SpecInfo, BaseSpec]] = None, - cont_dim=None, - messenger_args=None, - messengers=None, - name=None, - output_spec: ty.Optional[ty.Union[SpecInfo, BaseSpec]] = None, - rerun=False, - **kwargs, - ): - """ - Initialize this task. - - Parameters - ---------- - func : :obj:`callable` - A Python executable function. - audit_flags : :obj:`pydra.utils.messenger.AuditFlag` - Auditing configuration - cache_dir : :obj:`os.pathlike` - Cache directory - cache_locations : :obj:`list` of :obj:`os.pathlike` - List of alternative cache locations. - input_spec : :obj:`pydra.engine.specs.SpecInfo` - Specification of inputs. - cont_dim : :obj:`dict`, or `None` - Container dimensions for input fields, - if any of the container should be treated as a container - messenger_args : - TODO - messengers : - TODO - name : :obj:`str` - Name of this task. - output_spec : :obj:`pydra.engine.specs.BaseSpec` - Specification of inputs. - - """ - if input_spec is None: - fields = [] - for val in inspect.signature(func).parameters.values(): - if val.default is not inspect.Signature.empty: - val_dflt = val.default - else: - val_dflt = attr.NOTHING - if isinstance(val.annotation, ty.TypeVar): - raise NotImplementedError( - "Template types are not currently supported in task signatures " - f"(found in '{val.name}' field of '{name}' task), " - "see https://github.com/nipype/pydra/issues/672" - ) - fields.append( - ( - val.name, - attr.ib( - default=val_dflt, - type=val.annotation, - metadata={ - "help_string": f"{val.name} parameter from {func.__name__}" - }, - ), - ) - ) - fields.append(("_func", attr.ib(default=cp.dumps(func), type=bytes))) - input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseSpec,)) - else: - input_spec.fields.append( - ("_func", attr.ib(default=cp.dumps(func), type=bytes)) - ) - self.input_spec = input_spec - if name is None: - name = func.__name__ - super().__init__( - name, - inputs=kwargs, - cont_dim=cont_dim, - audit_flags=audit_flags, - messengers=messengers, - messenger_args=messenger_args, - cache_dir=cache_dir, - cache_locations=cache_locations, - rerun=rerun, - ) - if output_spec is None: - name = "Output" - fields = [("out", ty.Any)] - if "return" in func.__annotations__: - return_info = func.__annotations__["return"] - # # e.g. python annotation: fun() -> ty.NamedTuple("Output", [("out", float)]) - # # or pydra decorator: @pydra.mark.annotate({"return": ty.NamedTuple(...)}) - # - - if ( - hasattr(return_info, "__name__") - and getattr(return_info, "__annotations__", None) - and not issubclass(return_info, DataType) - ): - name = return_info.__name__ - fields = list(return_info.__annotations__.items()) - # e.g. python annotation: fun() -> {"out": int} - # or pydra decorator: @pydra.mark.annotate({"return": {"out": int}}) - elif isinstance(return_info, dict): - fields = list(return_info.items()) - # e.g. python annotation: fun() -> (int, int) - # or pydra decorator: @pydra.mark.annotate({"return": (int, int)}) - elif isinstance(return_info, tuple): - fields = [(f"out{i}", t) for i, t in enumerate(return_info, 1)] - # e.g. python annotation: fun() -> int - # or pydra decorator: @pydra.mark.annotate({"return": int}) - else: - fields = [("out", return_info)] - output_spec = SpecInfo(name=name, fields=fields, bases=(BaseSpec,)) - - self.output_spec = output_spec + definition: PythonDef def _run_task(self, environment=None): - inputs = attr.asdict(self.inputs, recurse=False) - del inputs["_func"] + inputs = attrs_values(self.definition) + del inputs["function"] self.output_ = None - output = cp.loads(self.inputs._func)(**inputs) - output_names = [el[0] for el in self.output_spec.fields] + output = self.definition.function(**inputs) + output_names = [f.name for f in attrs.fields(self.definition.Outputs)] if output is None: self.output_ = {nm: None for nm in output_names} elif len(output_names) == 1: @@ -217,22 +85,20 @@ def _run_task(self, environment=None): ) -class ShellCommandTask(TaskBase): +class ShellTask(Task): """Wrap a shell command as a task element.""" - input_spec = None - output_spec = None + definition: ShellDef def __init__( self, + definition: ShellDef, audit_flags: AuditFlag = AuditFlag.NONE, cache_dir=None, - input_spec: ty.Optional[SpecInfo] = None, cont_dim=None, messenger_args=None, messengers=None, name=None, - output_spec: ty.Optional[SpecInfo] = None, rerun=False, strip=False, environment=Native(), @@ -258,41 +124,16 @@ def __init__( TODO name : :obj:`str` Name of this task. - output_spec : :obj:`pydra.engine.specs.BaseSpec` + output_spec : :obj:`pydra.engine.specs.BaseDef` Specification of inputs. strip : :obj:`bool` TODO - """ - - # using default name for task if no name provided - if name is None: - name = "ShellTask_noname" - - # using provided spec, class attribute or setting the default SpecInfo - self.input_spec = ( - input_spec - or self.input_spec - or SpecInfo(name="Inputs", fields=[], bases=(ShellSpec,)) - ) - self.output_spec = ( - output_spec - or self.output_spec - or SpecInfo(name="Output", fields=[], bases=(ShellOutSpec,)) - ) - self.output_spec = output_from_inputfields(self.output_spec, self.input_spec) - - for special_inp in ["executable", "args"]: - if hasattr(self, special_inp): - if special_inp not in kwargs: - kwargs[special_inp] = getattr(self, special_inp) - elif kwargs[special_inp] != getattr(self, special_inp): - warnings.warn( - f"you are changing the executable from {getattr(self, special_inp)} " - f"to {kwargs[special_inp]}" - ) - + self.return_code = None + self.stdout = None + self.stderr = None super().__init__( + definition=definition, name=name, inputs=kwargs, cont_dim=cont_dim, @@ -307,286 +148,41 @@ def __init__( self.bindings = {} self.inputs_mod_root = {} - def get_bindings(self, root: str | None = None) -> dict[str, tuple[str, str]]: - """Return bindings necessary to run task in an alternative root. - - This is primarily intended for contexts when a task is going - to be run in a container with mounted volumes. - - Arguments - --------- - root: str - - Returns - ------- - bindings: dict - Mapping from paths in the host environment to the target environment - """ - - if root is None: - return {} - else: - self._prepare_bindings(root=root) - return self.bindings - - def command_args(self, root=None): - """Get command line arguments""" - if is_lazy(self.inputs): - raise Exception("can't return cmdline, self.inputs has LazyFields") - if self.state: - raise NotImplementedError - - modified_inputs = template_update(self.inputs, output_dir=self.output_dir) - for field_name, field_value in modified_inputs.items(): - setattr(self.inputs, field_name, field_value) - - pos_args = [] # list for (position, command arg) - self._positions_provided = [] - for field in attr_fields(self.inputs): - name, meta = field.name, field.metadata - if ( - getattr(self.inputs, name) is attr.NOTHING - and not meta.get("readonly") - and not meta.get("formatter") - ): - continue - if name == "executable": - pos_args.append(self._command_shelltask_executable(field)) - elif name == "args": - pos_val = self._command_shelltask_args(field) - if pos_val: - pos_args.append(pos_val) - else: - if name in modified_inputs: - pos_val = self._command_pos_args(field, root=root) - else: - pos_val = self._command_pos_args(field) - if pos_val: - pos_args.append(pos_val) - - # Sort command and arguments by position - cmd_args = position_sort(pos_args) - # pos_args values are each a list of arguments, so concatenate lists after sorting - return sum(cmd_args, []) - - def _field_value(self, field, check_file=False): - """ - Checking value of the specific field, if value is not set, None is returned. - check_file has no effect, but subclasses can use it to validate or modify - filenames. - """ - value = getattr(self.inputs, field.name) - if value == attr.NOTHING: - value = None - return value - - def _command_shelltask_executable(self, field): - """Returning position and value for executable ShellTask input""" - pos = 0 # executable should be the first el. of the command - value = self._field_value(field) - if value is None: - raise ValueError("executable has to be set") - return pos, ensure_list(value, tuple2list=True) - def _command_shelltask_args(self, field): - """Returning position and value for args ShellTask input""" - pos = -1 # assuming that args is the last el. of the command - value = self._field_value(field, check_file=True) - if value is None: - return None - else: - return pos, ensure_list(value, tuple2list=True) +class BoshTask(ShellDef): - def _command_pos_args(self, field, root=None): - """ - Checking all additional input fields, setting pos to None, if position not set. - Creating a list with additional parts of the command that comes from - the specific field. - """ - argstr = field.metadata.get("argstr", None) - formatter = field.metadata.get("formatter", None) - if argstr is None and formatter is None: - # assuming that input that has no argstr is not used in the command, - # or a formatter is not provided too. - return None - pos = field.metadata.get("position", None) - if pos is not None: - if not isinstance(pos, int): - raise Exception(f"position should be an integer, but {pos} given") - # checking if the position is not already used - if pos in self._positions_provided: - raise Exception( - f"{field.name} can't have provided position, {pos} is already used" - ) - - self._positions_provided.append(pos) - - # Shift non-negatives up to allow executable to be 0 - # Shift negatives down to allow args to be -1 - pos += 1 if pos >= 0 else -1 - - value = self._field_value(field, check_file=True) - - if value: - if field.name in self.inputs_mod_root: - value = self.inputs_mod_root[field.name] - elif root: # values from templates - value = value.replace(str(self.output_dir), f"{root}{self.output_dir}") - - if field.metadata.get("readonly", False) and value is not None: - raise Exception(f"{field.name} is read only, the value can't be provided") - elif ( - value is None - and not field.metadata.get("readonly", False) - and formatter is None - ): - return None - - inputs_dict = attr.asdict(self.inputs, recurse=False) - - cmd_add = [] - # formatter that creates a custom command argument - # it can take the value of the field, all inputs, or the value of other fields. - if "formatter" in field.metadata: - call_args = inspect.getfullargspec(field.metadata["formatter"]) - call_args_val = {} - for argnm in call_args.args: - if argnm == "field": - call_args_val[argnm] = value - elif argnm == "inputs": - call_args_val[argnm] = inputs_dict - else: - if argnm in inputs_dict: - call_args_val[argnm] = inputs_dict[argnm] - else: - raise AttributeError( - f"arguments of the formatter function from {field.name} " - f"has to be in inputs or be field or output_dir, " - f"but {argnm} is used" - ) - cmd_el_str = field.metadata["formatter"](**call_args_val) - cmd_el_str = cmd_el_str.strip().replace(" ", " ") - if cmd_el_str != "": - cmd_add += split_cmd(cmd_el_str) - elif field.type is bool: - # if value is simply True the original argstr is used, - # if False, nothing is added to the command. - if value is True: - cmd_add.append(argstr) - else: - sep = field.metadata.get("sep", " ") - if ( - argstr.endswith("...") - and isinstance(value, ty.Iterable) - and not isinstance(value, (str, bytes)) - ): - argstr = argstr.replace("...", "") - # if argstr has a more complex form, with "{input_field}" - if "{" in argstr and "}" in argstr: - argstr_formatted_l = [] - for val in value: - argstr_f = argstr_formatting( - argstr, self.inputs, value_updates={field.name: val} - ) - argstr_formatted_l.append(f" {argstr_f}") - cmd_el_str = sep.join(argstr_formatted_l) - else: # argstr has a simple form, e.g. "-f", or "--f" - cmd_el_str = sep.join([f" {argstr} {val}" for val in value]) + def _command_args_single(self, state_ind=None, index=None): + """Get command line arguments for a single state""" + input_filepath = self._bosh_invocation_file(state_ind=state_ind, index=index) + cmd_list = ( + self.definition.executable + + [str(self.bosh_file), input_filepath] + + self.definition.args + + self.bindings + ) + return cmd_list + + def _bosh_invocation_file(self, state_ind=None, index=None): + """creating bosh invocation file - json file with inputs values""" + input_json = {} + for f in attrs_fields(self.definition, exclude_names=("executable", "args")): + if self.state and f"{self.name}.{f.name}" in state_ind: + value = getattr(self.definition, f.name)[ + state_ind[f"{self.name}.{f.name}"] + ] else: - # in case there are ... when input is not a list - argstr = argstr.replace("...", "") - if isinstance(value, ty.Iterable) and not isinstance( - value, (str, bytes) - ): - cmd_el_str = sep.join([str(val) for val in value]) - value = cmd_el_str - # if argstr has a more complex form, with "{input_field}" - if "{" in argstr and "}" in argstr: - cmd_el_str = argstr.replace(f"{{{field.name}}}", str(value)) - cmd_el_str = argstr_formatting(cmd_el_str, self.inputs) - else: # argstr has a simple form, e.g. "-f", or "--f" - if value: - cmd_el_str = f"{argstr} {value}" - else: - cmd_el_str = "" - if cmd_el_str: - cmd_add += split_cmd(cmd_el_str) - return pos, cmd_add + value = getattr(self.definition, f.name) + # adding to the json file if specified by the user + if value is not attrs.NOTHING and value != "NOTHING": + if is_local_file(f): + value = Path(value) + self.bindings.extend(["-v", f"{value.parent}:{value.parent}:ro"]) + value = str(value) - @property - def cmdline(self): - """Get the actual command line that will be submitted - Returns a list if the task has a state. - """ - if is_lazy(self.inputs): - raise Exception("can't return cmdline, self.inputs has LazyFields") - # checking the inputs fields before returning the command line - self.inputs.check_fields_input_spec() - if self.state: - raise NotImplementedError - # Skip the executable, which can be a multi-part command, e.g. 'docker run'. - cmdline = self.command_args()[0] - for arg in self.command_args()[1:]: - # If there are spaces in the arg, and it is not enclosed by matching - # quotes, add quotes to escape the space. Not sure if this should - # be expanded to include other special characters apart from spaces - if " " in arg: - cmdline += " '" + arg + "'" - else: - cmdline += " " + arg - return cmdline + input_json[f.name] = value - def _run_task(self, environment=None): - if environment is None: - environment = self.environment - self.output_ = environment.execute(self) + filename = self.cache_dir / f"{self.name}-{index}.json" + with open(filename, "w") as jsonfile: + json.dump(input_json, jsonfile) - def _prepare_bindings(self, root: str): - """Prepare input files to be passed to the task - - This updates the ``bindings`` attribute of the current task to make files available - in an ``Environment``-defined ``root``. - """ - for fld in attr_fields(self.inputs): - if TypeParser.contains_type(FileSet, fld.type): - fileset = getattr(self.inputs, fld.name) - copy = parse_copyfile(fld)[0] == FileSet.CopyMode.copy - - host_path, env_path = fileset.parent, Path(f"{root}{fileset.parent}") - - # Default to mounting paths as read-only, but respect existing modes - old_mode = self.bindings.get(host_path, ("", "ro"))[1] - self.bindings[host_path] = (env_path, "rw" if copy else old_mode) - - # Provide in-container paths without type-checking - self.inputs_mod_root[fld.name] = tuple( - env_path / rel for rel in fileset.relative_fspaths - ) - - DEFAULT_COPY_COLLATION = FileSet.CopyCollation.adjacent - - -def split_cmd(cmd: str): - """Splits a shell command line into separate arguments respecting quotes - - Parameters - ---------- - cmd : str - Command line string or part thereof - - Returns - ------- - str - the command line string split into process args - """ - # Check whether running on posix or Windows system - on_posix = platform.system() != "Windows" - args = shlex.split(cmd, posix=on_posix) - cmd_args = [] - for arg in args: - match = re.match("(['\"])(.*)\\1$", arg) - if match: - cmd_args.append(match.group(2)) - else: - cmd_args.append(arg) - return cmd_args + return str(filename) diff --git a/pydra/engine/tests/conftest.py b/pydra/engine/tests/conftest.py index b7ecfbb8e9..642944cf5c 100644 --- a/pydra/engine/tests/conftest.py +++ b/pydra/engine/tests/conftest.py @@ -1,3 +1,4 @@ +from pathlib import Path import pytest @@ -8,9 +9,9 @@ @pytest.fixture(scope="package") -def data_tests_dir(): - test_nii = importlib_resources.files("pydra").joinpath( +def data_tests_dir() -> Path: + data_dir = importlib_resources.files("pydra").joinpath( "engine", "tests", "data_tests" ) - with importlib_resources.as_file(test_nii) as path: + with importlib_resources.as_file(data_dir) as path: yield path diff --git a/pydra/engine/tests/test_boutiques.py b/pydra/engine/tests/test_boutiques.py index 48f484b687..79652a6d58 100644 --- a/pydra/engine/tests/test_boutiques.py +++ b/pydra/engine/tests/test_boutiques.py @@ -2,12 +2,9 @@ import subprocess as sp import attr import pytest - -from ..core import Workflow -from ..task import ShellCommandTask -from ..submitter import Submitter -from ..boutiques import BoshTask -from .utils import result_no_submitter, result_submitter, no_win +from pydra.engine.helpers import attrs_values +from .utils import run_no_submitter, run_submitter, no_win +from pydra.design import workflow, boutiques, shell need_bosh_docker = pytest.mark.skipif( shutil.which("docker") is None @@ -25,14 +22,13 @@ @pytest.mark.parametrize( "maskfile", ["test_brain.nii.gz", "test_brain", "test_brain.nii"] ) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_boutiques_1(maskfile, plugin, results_function, tmpdir, data_tests_dir): """simple task to run fsl.bet using BoshTask""" - btask = BoshTask(name="NA", zenodo_id="1482743") - btask.inputs.infile = data_tests_dir / "test.nii.gz" - btask.inputs.maskfile = maskfile - btask.cache_dir = tmpdir - res = results_function(btask, plugin) + btask = boutiques.define(zenodo_id="1482743") + btask.infile = data_tests_dir / "test.nii.gz" + btask.maskfile = maskfile + res = btask(plugin, cache_dir=tmpdir) assert res.output.return_code == 0 @@ -47,21 +43,21 @@ def test_boutiques_1(maskfile, plugin, results_function, tmpdir, data_tests_dir) @need_bosh_docker @pytest.mark.flaky(reruns=3) def test_boutiques_spec_1(data_tests_dir): - """testing spec: providing input/output fields names""" - btask = BoshTask( - name="NA", + """testing definition: providing input/output fields names""" + btask = boutiques.define( zenodo_id="1482743", - infile=data_tests_dir / "test.nii.gz", - maskfile="test_brain.nii.gz", input_spec_names=["infile", "maskfile"], output_spec_names=["outfile", "out_outskin_off"], + )( + infile=data_tests_dir / "test.nii.gz", + maskfile="test_brain.nii.gz", ) assert len(btask.input_spec.fields) == 2 assert btask.input_spec.fields[0][0] == "infile" assert btask.input_spec.fields[1][0] == "maskfile" - assert hasattr(btask.inputs, "infile") - assert hasattr(btask.inputs, "maskfile") + assert hasattr(btask.definition, "infile") + assert hasattr(btask.definition, "maskfile") assert len(btask.output_spec.fields) == 2 assert btask.output_spec.fields[0][0] == "outfile" @@ -72,21 +68,20 @@ def test_boutiques_spec_1(data_tests_dir): @need_bosh_docker @pytest.mark.flaky(reruns=3) def test_boutiques_spec_2(data_tests_dir): - """testing spec: providing partial input/output fields names""" - btask = BoshTask( - name="NA", - zenodo_id="1482743", + """testing definition: providing partial input/output fields names""" + btask = boutiques.define( + zenodo_id="1482743", input_spec_names=["infile"], output_spec_names=[] + )( infile=data_tests_dir / "test.nii.gz", maskfile="test_brain.nii.gz", - input_spec_names=["infile"], - output_spec_names=[], ) - assert len(btask.input_spec.fields) == 1 - assert btask.input_spec.fields[0][0] == "infile" - assert hasattr(btask.inputs, "infile") + fields = attrs_values(btask) + assert len(fields) == 1 + assert fields[0][0] == "infile" + assert hasattr(btask.definition, "infile") # input doesn't see maskfile - assert not hasattr(btask.inputs, "maskfile") + assert not hasattr(btask.definition, "maskfile") assert len(btask.output_spec.fields) == 0 @@ -99,24 +94,20 @@ def test_boutiques_spec_2(data_tests_dir): ) def test_boutiques_wf_1(maskfile, plugin, tmpdir, infile): """wf with one task that runs fsl.bet using BoshTask""" - wf = Workflow(name="wf", input_spec=["maskfile", "infile"]) - wf.inputs.maskfile = maskfile - wf.inputs.infile = infile - wf.cache_dir = tmpdir - - wf.add( - BoshTask( - name="bet", - zenodo_id="1482743", - infile=wf.lzin.infile, - maskfile=wf.lzin.maskfile, + + @workflow.define + def Workflow(maskfile, infile): + bet = workflow.add( + boutiques.define(zenodo_id="1482743")( + infile=infile, + maskfile=maskfile, + ) ) - ) - wf.set_output([("outfile", wf.bet.lzout.outfile)]) + return bet.outfile - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + wf = Workflow(maskfile=maskfile, infile=infile) + wf(plugin=plugin, cache_dir=tmpdir) res = wf.result() assert res.output.outfile.name == "test_brain.nii.gz" @@ -132,39 +123,27 @@ def test_boutiques_wf_1(maskfile, plugin, tmpdir, infile): ) def test_boutiques_wf_2(maskfile, plugin, tmpdir, infile): """wf with two BoshTasks (fsl.bet and fsl.stats) and one ShellTask""" - wf = Workflow(name="wf", input_spec=["maskfile", "infile"]) - wf.inputs.maskfile = maskfile - wf.inputs.infile = infile - wf.cache_dir = tmpdir - - wf.add( - BoshTask( - name="bet", - zenodo_id="1482743", - infile=wf.lzin.infile, - maskfile=wf.lzin.maskfile, + + @workflow.define(outputs=["outfile_bet", "out_stat", "out"]) + def Workflow(maskfile, infile): + + bet = workflow.add( + boutiques.define(zenodo_id="1482743")( + infile=infile, + maskfile=maskfile, + ) ) - ) - # used to be "3240521", but can't access anymore - wf.add( - BoshTask( - name="stat", zenodo_id="4472771", input_file=wf.bet.lzout.outfile, v=True + # used to be "3240521", but can't access anymore + stat = workflow.add( + boutiques.define(zenodo_id="4472771")( + input_file=bet.outfile, + v=True, + ) ) - ) - wf.add(ShellCommandTask(name="cat", executable="cat", args=wf.stat.lzout.output)) - - wf.set_output( - [ - ("outfile_bet", wf.bet.lzout.outfile), - ("out_stat", wf.stat.lzout.output), - ("out", wf.cat.lzout.stdout), - ] - ) + cat = workflow.add(shell.define("cat ")(file=stat.output)) + return bet.outfile, stat.output, cat.stdout - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) - - res = wf.result() + res = Workflow(maskfile=maskfile, infile=infile)(plugin=plugin, cache_dir=tmpdir) assert res.output.outfile_bet.name == "test_brain.nii.gz" assert res.output.outfile_bet.exists() diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index 5ccf37e292..3b57cb35a4 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -1,13 +1,12 @@ -import typing as ty +import attrs import pytest -import attr - -from ..task import ShellCommandTask -from ..submitter import Submitter -from ..core import Workflow -from ..specs import ShellOutSpec, SpecInfo, File, ShellSpec -from ..environments import Docker -from .utils import no_win, need_docker, result_submitter, result_no_submitter +from pydra.engine.submitter import Submitter +from pydra.engine.specs import ShellDef, ShellOutputs +from fileformats.generic import File +from pydra.engine.environments import Docker +from pydra.design import shell, workflow +from pydra.engine.core import Task +from .utils import no_win, need_docker, run_submitter, run_no_submitter @no_win @@ -17,17 +16,21 @@ def test_docker_1_nosubm(): no submitter """ cmd = "whoami" - docky = ShellCommandTask( - name="docky", executable=cmd, environment=Docker(image="busybox") + Docky = shell.define(cmd) + docky = Docky() + docky_task = Task( + definition=docky, + name="docky", + submitter=Submitter(environment=Docker(image="busybox")), ) - assert docky.environment.image == "busybox" - assert docky.environment.tag == "latest" - assert isinstance(docky.environment, Docker) + assert docky_task.environment.image == "busybox" + assert docky_task.environment.tag == "latest" + assert isinstance(docky_task.environment, Docker) assert docky.cmdline == cmd - res = docky() - assert res.output.stdout == "root\n" - assert res.output.return_code == 0 + res = docky_task.run() + assert res.outputs.stdout == "root\n" + assert res.outputs.return_code == 0 @no_win @@ -37,58 +40,50 @@ def test_docker_1(plugin): using submitter """ cmd = "whoami" - docky = ShellCommandTask( - name="docky", executable=cmd, environment=Docker(image="busybox") - ) + Docky = shell.define(cmd) + docky = Docky() - with Submitter(plugin=plugin) as sub: - docky(submitter=sub) + with Submitter(environment=Docker(image="busybox")) as sub: + res = sub(docky) - res = docky.result() - assert res.output.stdout == "root\n" - assert res.output.return_code == 0 + assert res.outputs.stdout == "root\n" + assert res.outputs.return_code == 0 @no_win @need_docker -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_docker_2(results_function, plugin): +@pytest.mark.parametrize("run_function", [run_no_submitter, run_submitter]) +def test_docker_2(run_function, plugin, tmp_path): """a command with arguments, cmd and args given as executable with and without submitter """ - cmd = ["echo", "hail", "pydra"] - docky = ShellCommandTask( - name="docky", executable=cmd, environment=Docker(image="busybox") - ) + cmdline = "echo hail pydra" + Docky = shell.define(cmdline) + docky = Docky() # cmdline doesn't know anything about docker - assert docky.cmdline == " ".join(cmd) - res = results_function(docky, plugin) - assert res.output.stdout.strip() == " ".join(cmd[1:]) - assert res.output.return_code == 0 + assert docky.cmdline == cmdline + outputs = run_function(docky, tmp_path, plugin, environment=Docker(image="busybox")) + assert outputs.stdout.strip() == " ".join(cmdline.split()[1:]) + assert outputs.return_code == 0 @no_win @need_docker -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_docker_2a(results_function, plugin): +@pytest.mark.parametrize("run_function", [run_no_submitter, run_submitter]) +def test_docker_2a(run_function, plugin, tmp_path): """a command with arguments, using executable and args using submitter """ - cmd_exec = "echo" - cmd_args = ["hail", "pydra"] + cmd = ["echo", "hail", "pydra"] # separate command into exec + args - docky = ShellCommandTask( - name="docky", - executable=cmd_exec, - args=cmd_args, - environment=Docker(image="busybox"), - ) - assert docky.inputs.executable == "echo" - assert docky.cmdline == f"{cmd_exec} {' '.join(cmd_args)}" + Docky = shell.define(cmd) + docky = Docky() + assert docky.executable == cmd + assert docky.cmdline == " ".join(cmd) - res = results_function(docky, plugin) - assert res.output.stdout.strip() == " ".join(cmd_args) - assert res.output.return_code == 0 + outputs = run_function(docky, tmp_path, plugin, environment=Docker(image="busybox")) + assert outputs.stdout.strip() == " ".join(cmd[1:]) + assert outputs.return_code == 0 # tests with State @@ -96,21 +91,22 @@ def test_docker_2a(results_function, plugin): @no_win @need_docker -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_docker_st_1(results_function, plugin): +@pytest.mark.parametrize("run_function", [run_no_submitter, run_submitter]) +def test_docker_st_1(run_function, plugin, tmp_path): """commands without arguments in container splitter = executable """ cmd = ["pwd", "whoami"] - docky = ShellCommandTask(name="docky", environment=Docker(image="busybox")).split( - "executable", executable=cmd - ) - assert docky.state.splitter == "docky.executable" + Docky = shell.define("docky") # cmd is just a placeholder + docky = Docky().split(executable=cmd) - res = results_function(docky, plugin) - assert res[0].output.stdout == f"/mnt/pydra{docky.output_dir[0]}\n" - assert res[1].output.stdout == "root\n" - assert res[0].output.return_code == res[1].output.return_code == 0 + outputs = run_function(docky, tmp_path, plugin, environment=Docker(image="busybox")) + assert ( + outputs.stdout[0] + == f"/mnt/pydra{tmp_path}/{attrs.evolve(docky, executable=cmd[0])._checksum}\n" + ) + assert outputs.stdout[1] == "root\n" + assert outputs.return_code[0] == outputs.return_code[1] == 0 # tests with customized output_spec @@ -123,24 +119,11 @@ def test_docker_outputspec_1(plugin, tmp_path): customised output_spec, adding files to the output, providing specific pathname output_path is automatically added to the bindings """ - cmd = ["touch", "newfile_tmp.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "newfile_tmp.txt")], - bases=(ShellOutSpec,), - ) - docky = ShellCommandTask( - name="docky", - environment=Docker(image="ubuntu"), - executable=cmd, - output_spec=my_output_spec, - ) - - with Submitter(plugin=plugin) as sub: - docky(submitter=sub) + Docky = shell.define("touch ") + docky = Docky() - res = docky.result() - assert res.output.stdout == "" + outputs = docky(plugin=plugin, environment=Docker(image="ubuntu")) + assert outputs.stdout == "" # tests with customised input_spec @@ -148,50 +131,39 @@ def test_docker_outputspec_1(plugin, tmp_path): @no_win @need_docker -def test_docker_inputspec_1(tmp_path): - """a simple customized input spec for docker task""" +def test_docker_inputspec_1(tmp_path, plugin): + """a simple customized input definition for docker task""" filename = str(tmp_path / "file_pydra.txt") with open(filename, "w") as f: f.write("hello from pydra") cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellSpec,), ) - docky = ShellCommandTask( - name="docky", - environment=Docker(image="busybox"), - executable=cmd, - file=filename, - input_spec=my_input_spec, - strip=True, - ) + docky = Docky(file=filename) - res = docky() - assert res.output.stdout == "hello from pydra" + outputs = docky( + cache_dir=tmp_path, worker=plugin, environment=Docker(image="busybox") + ) + assert outputs.stdout.strip() == "hello from pydra" @no_win @need_docker def test_docker_inputspec_1a(tmp_path): - """a simple customized input spec for docker task + """a simple customized input definition for docker task a default value is used """ filename = str(tmp_path / "file_pydra.txt") @@ -200,37 +172,30 @@ def test_docker_inputspec_1a(tmp_path): cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - default=filename, - metadata={"position": 1, "argstr": "", "help_string": "input file"}, - ), + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + default=filename, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellSpec,), ) - docky = ShellCommandTask( - name="docky", - environment=Docker(image="busybox"), - executable=cmd, - input_spec=my_input_spec, - strip=True, - ) + docky = Docky() - res = docky() - assert res.output.stdout == "hello from pydra" + outputs = docky(cache_dir=tmp_path, environment=Docker(image="busybox")) + assert outputs.stdout.strip() == "hello from pydra" @no_win @need_docker def test_docker_inputspec_2(plugin, tmp_path): - """a customized input spec with two fields for docker task""" + """a customized input definition with two fields for docker task""" filename_1 = tmp_path / "file_pydra.txt" with open(filename_1, "w") as f: f.write("hello from pydra\n") @@ -241,53 +206,41 @@ def test_docker_inputspec_2(plugin, tmp_path): cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help_string": "input file 1", - }, - ), + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file1", + type=File, + position=1, + argstr="", + help="input file 1", ), - ( - "file2", - attr.ib( - type=File, - default=filename_2, - metadata={ - "position": 2, - "argstr": "", - "help_string": "input file 2", - }, - ), + shell.arg( + name="file2", + type=File, + default=filename_2, + position=2, + argstr="", + help="input file 2", ), ], - bases=(ShellSpec,), + ) + docky = Docky( + file1=filename_1, ) - docky = ShellCommandTask( + outputs = docky( name="docky", environment=Docker(image="busybox"), - executable=cmd, - file1=filename_1, - input_spec=my_input_spec, - strip=True, ) - - res = docky() - assert res.output.stdout == "hello from pydra\nhave a nice one" + assert outputs.stdout.strip() == "hello from pydra\nhave a nice one" @no_win @need_docker def test_docker_inputspec_2a_except(plugin, tmp_path): - """a customized input spec with two fields + """a customized input definition with two fields first one uses a default, and second doesn't - raises a dataclass exception """ filename_1 = tmp_path / "file_pydra.txt" @@ -299,55 +252,42 @@ def test_docker_inputspec_2a_except(plugin, tmp_path): cmd = "cat" - # the field with default value can't be before value without default - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - attr.ib( - type=File, - default=filename_1, - metadata={ - "position": 1, - "argstr": "", - "help_string": "input file 1", - }, - ), + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file1", + type=File, + default=filename_1, + position=1, + argstr="", + help="input file 1", ), - ( - "file2", - attr.ib( - type=File, - metadata={ - "position": 2, - "argstr": "", - "help_string": "input file 2", - }, - ), + shell.arg( + name="file2", + type=File, + position=2, + argstr="", + help="input file 2", ), ], - bases=(ShellSpec,), ) - docky = ShellCommandTask( - name="docky", - environment=Docker(image="busybox"), - executable=cmd, + docky = Docky( file2=filename_2, - input_spec=my_input_spec, - strip=True, ) - assert docky.inputs.file2.fspath == filename_2 + assert docky.file2.fspath == filename_2 - res = docky() - assert res.output.stdout == "hello from pydra\nhave a nice one" + outputs = docky( + cache_dir=tmp_path, worker=plugin, environment=Docker(image="busybox") + ) + assert outputs.stdout.strip() == "hello from pydra\nhave a nice one" @no_win @need_docker def test_docker_inputspec_2a(plugin, tmp_path): - """a customized input spec with two fields + """a customized input definition with two fields first one uses a default value this is fine even if the second field is not using any defaults """ @@ -360,48 +300,33 @@ def test_docker_inputspec_2a(plugin, tmp_path): cmd = "cat" - # if you want set default in the first field you can use default value - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - attr.ib( - type=File, - default=filename_1, - metadata={ - "position": 1, - "argstr": "", - "help_string": "input file 1", - }, - ), + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file1", + type=File, + default=filename_1, + position=1, + argstr="", + help="input file 1", ), - ( - "file2", - attr.ib( - type=File, - metadata={ - "position": 2, - "argstr": "", - "help_string": "input file 2", - }, - ), + shell.arg( + name="file2", + type=File, + position=2, + argstr="", + help="input file 2", ), ], - bases=(ShellSpec,), ) - docky = ShellCommandTask( - name="docky", - environment=Docker(image="busybox"), - executable=cmd, - file2=filename_2, - input_spec=my_input_spec, - strip=True, - ) + docky = Docky(file2=filename_2) - res = docky() - assert res.output.stdout == "hello from pydra\nhave a nice one" + outputs = docky( + cache_dir=tmp_path, worker=plugin, environment=Docker(image="busybox") + ) + assert outputs.stdout.strip() == "hello from pydra\nhave a nice one" @no_win @@ -414,38 +339,26 @@ def test_docker_inputspec_3(plugin, tmp_path): cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - "container_path": True, - }, - ), - ) - ], - bases=(ShellSpec,), - ) - - docky = ShellCommandTask( - name="docky", + inputs = [ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", + container_path=True, + ) + ] + + docky = shell.define(cmd, inputs=inputs)( environment=Docker(image="busybox"), - executable=cmd, file=filename, - input_spec=my_input_spec, strip=True, ) cmdline = docky.cmdline - res = docky() - assert "docker" in res.output.stdout + outputs = docky() + assert "docker" in outputs.stdout assert cmdline == docky.cmdline @@ -460,52 +373,32 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmp_path): with open(file, "w") as f: f.write("hello from pydra\n") - cmd = ["sed", "-is", "s/hello/hi/"] - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help_string": "orig file", - "mandatory": True, - "copyfile": "copy", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{orig_file}", - "help_string": "output file", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Docky(ShellDef["Docky.Outputs"]): + executable = ["sed", "-is", "s/hello/hi/"] + orig_file: File = shell.arg( + position=1, + argstr="", + help="orig file", + copy_mode="copy", + ) + + class Outputs(ShellOutputs): + out_file: File = shell.outarg( + path_template="{orig_file}.txt", + help="output file", + ) - docky = ShellCommandTask( - name="docky", - environment=Docker(image="busybox"), - executable=cmd, - input_spec=my_input_spec, - orig_file=str(file), - ) + docky = Docky(orig_file=str(file)) - res = docky() - assert res.output.stdout == "" - out_file = res.output.out_file.fspath + outputs = docky( + cache_dir=tmp_path, worker=plugin, environment=Docker(image="busybox") + ) + assert outputs.stdout == "" + out_file = outputs.out_file.fspath assert out_file.exists() - # the file is copied, and than it is changed in place - assert out_file.parent == docky.output_dir + # the file is copied, and then it is changed in place + assert out_file.parent.parent == tmp_path with open(out_file) as f: assert "hi from pydra\n" == f.read() # the original file is unchanged @@ -516,7 +409,7 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmp_path): @no_win @need_docker def test_docker_inputspec_state_1(plugin, tmp_path): - """a customised input spec for a docker file with a splitter, + """a customised input definition for a docker file with a splitter, splitter is on files """ filename_1 = tmp_path / "file_pydra.txt" @@ -528,43 +421,33 @@ def test_docker_inputspec_state_1(plugin, tmp_path): cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellSpec,), ) - docky = ShellCommandTask( - name="docky", - environment=Docker(image="busybox"), - executable=cmd, - input_spec=my_input_spec, - strip=True, - ).split("file", file=[str(filename_1), str(filename_2)]) + docky = Docky().split(file=[str(filename_1), str(filename_2)]) - res = docky() - assert res[0].output.stdout == "hello from pydra" - assert res[1].output.stdout == "have a nice one" + outputs = docky( + worker=plugin, cache_dir=tmp_path, environment=Docker(image="busybox") + ) + assert outputs.stdout[0].strip() == "hello from pydra" + assert outputs.stdout[1].strip() == "have a nice one" @no_win @need_docker def test_docker_inputspec_state_1b(plugin, tmp_path): - """a customised input spec for a docker file with a splitter, - files from the input spec have the same path in the local os and the container, + """a customised input definition for a docker file with a splitter, + files from the input definition have the same path in the local os and the container, so hash is calculated and the test works fine """ file_1 = tmp_path / "file_pydra.txt" @@ -575,96 +458,71 @@ def test_docker_inputspec_state_1b(plugin, tmp_path): f.write("have a nice one") cmd = "cat" - filename = [] - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), + + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellSpec,), ) + docky = Docky().split(file=[str(file_1), str(file_2)]) - docky = ShellCommandTask( - name="docky", - environment=Docker(image="busybox"), - executable=cmd, - input_spec=my_input_spec, - strip=True, - ).split("file", file=[str(file_1), str(file_2)]) - - res = docky() - assert res[0].output.stdout == "hello from pydra" - assert res[1].output.stdout == "have a nice one" + outputs = docky( + cache_dir=tmp_path, worker=plugin, environment=Docker(image="busybox") + ) + assert outputs.stdout[0].strip() == "hello from pydra" + assert outputs.stdout[1].strip() == "have a nice one" @no_win @need_docker def test_docker_wf_inputspec_1(plugin, tmp_path): - """a customized input spec for workflow with docker tasks""" + """a customized input definition for workflow with docker tasks""" filename = tmp_path / "file_pydra.txt" with open(filename, "w") as f: f.write("hello from pydra") cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellSpec,), ) - wf = Workflow(name="wf", input_spec=["cmd", "file"]) - wf.inputs.cmd = cmd - wf.inputs.file = filename + @workflow.define + def Workflow(file): - docky = ShellCommandTask( - name="docky", - environment=Docker(image="busybox"), - executable=wf.lzin.cmd, - file=wf.lzin.file, - input_spec=my_input_spec, - strip=True, - ) - wf.add(docky) + docky = workflow.add( + Docky(file=file), + environment=Docker(image="busybox"), + ) - wf.set_output([("out", wf.docky.lzout.stdout)]) + return docky.stdout - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + wf = Workflow(file=filename) - res = wf.result() - assert res.output.out == "hello from pydra" + outputs = wf(cache_dir=tmp_path) + assert outputs.out.strip() == "hello from pydra" @no_win @need_docker def test_docker_wf_state_inputspec_1(plugin, tmp_path): - """a customized input spec for workflow with docker tasks that has a state""" + """a customized input definition for workflow with docker tasks that has a state""" file_1 = tmp_path / "file_pydra.txt" file_2 = tmp_path / "file_nice.txt" with open(file_1, "w") as f: @@ -674,53 +532,41 @@ def test_docker_wf_state_inputspec_1(plugin, tmp_path): cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellSpec,), ) - wf = Workflow(name="wf", input_spec=["cmd", "file"]) - wf.split(file=[str(file_1), str(file_2)]) - wf.inputs.cmd = cmd + @workflow.define + def Workflow(file): - docky = ShellCommandTask( - name="docky", - environment=Docker(image="busybox"), - executable=wf.lzin.cmd, - file=wf.lzin.file, - input_spec=my_input_spec, - strip=True, - ) - wf.add(docky) + docky = workflow.add( + Docky(file=file), + environment=Docker(image="busybox"), + ) - wf.set_output([("out", wf.docky.lzout.stdout)]) + return docky.stdout - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + wf = Workflow().split(file=[file_1, file_2]) - res = wf.result() - assert res[0].output.out == "hello from pydra" - assert res[1].output.out == "have a nice one" + outputs = wf(cache_dir=tmp_path) + + assert outputs.out[0].strip() == "hello from pydra" + assert outputs.out[1].strip() == "have a nice one" @no_win @need_docker def test_docker_wf_ndst_inputspec_1(plugin, tmp_path): - """a customized input spec for workflow with docker tasks with states""" + """a customized input definition for workflow with docker tasks with states""" file_1 = tmp_path / "file_pydra.txt" file_2 = tmp_path / "file_nice.txt" with open(file_1, "w") as f: @@ -730,42 +576,30 @@ def test_docker_wf_ndst_inputspec_1(plugin, tmp_path): cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellSpec,), ) - wf = Workflow(name="wf", input_spec=["cmd", "file"]) - wf.inputs.cmd = cmd + @workflow.define + def Workflow(file): - docky = ShellCommandTask( - name="docky", - environment=Docker(image="busybox"), - executable=wf.lzin.cmd, - file=wf.lzin.file, - input_spec=my_input_spec, - strip=True, - ).split("file", file=[str(file_1), str(file_2)]) - wf.add(docky) + docky = workflow.add( + Docky(file=file), + environment=Docker(image="busybox"), + ) - wf.set_output([("out", wf.docky.lzout.stdout)]) + return docky.stdout - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + wf = Workflow().split(file=[str(file_1), str(file_2)]) - res = wf.result() - assert res.output.out == ["hello from pydra", "have a nice one"] + outputs = wf(cache_dir=tmp_path) + assert outputs.out == ["hello from pydra", "have a nice one"] diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index bd05d9daed..d0cbd7f63a 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -1,16 +1,13 @@ from pathlib import Path - -from ..environments import Native, Docker, Singularity -from ..task import ShellCommandTask -from ..submitter import Submitter -from ..specs import ( - ShellSpec, - SpecInfo, - File, -) +import typing as ty +from pydra.engine.environments import Native, Docker, Singularity +from pydra.engine.submitter import Submitter +from fileformats.generic import File +from pydra.design import shell +from pydra.engine.core import Task +from pydra.engine.task import ShellDef +from pydra.engine.helpers import attrs_values from .utils import no_win, need_docker, need_singularity - -import attr import pytest @@ -20,62 +17,72 @@ def makedir(path, name): return newdir +def drop_stderr(dct: dict[str, ty.Any]): + return {k: v for k, v in dct.items() if k != "stderr"} + + def test_native_1(tmp_path): """simple command, no arguments""" - newcache = lambda x: makedir(tmp_path, x) - cmd = ["whoami"] - shelly = ShellCommandTask( - name="shelly", executable=cmd, cache_dir=newcache("shelly") - ) - assert shelly.cmdline == " ".join(cmd) + def newcache(x): + return makedir(tmp_path, x) - env_res = Native().execute(shelly) - shelly() - assert env_res == shelly.output_ + cmd = "whoami" + Shelly = shell.define(cmd) + shelly = Shelly() + assert shelly.cmdline == cmd - shelly_call = ShellCommandTask( - name="shelly_call", executable=cmd, cache_dir=newcache("shelly_call") + shelly_job = Task( + definition=shelly, + submitter=Submitter(cache_dir=newcache("native-task")), + name="native", ) - shelly_call(environment=Native()) - assert env_res == shelly_call.output_ + env_outputs = Native().execute(shelly_job) - shelly_subm = ShellCommandTask( - name="shelly_subm", executable=cmd, cache_dir=newcache("shelly_subm") - ) - with Submitter(plugin="cf") as sub: - shelly_subm(submitter=sub, environment=Native()) - assert env_res == shelly_subm.result().output.__dict__ + outputs = shelly(cache_dir=newcache("native-exec")) + assert drop_stderr(env_outputs) == drop_stderr(attrs_values(outputs)) + + outputs = shelly(environment=Native(), cache_dir=newcache("native-call")) + assert env_outputs == attrs_values(outputs) + + with Submitter(cache_dir=newcache("native-submitter"), environment=Native()) as sub: + result = sub(shelly) + assert drop_stderr(env_outputs) == drop_stderr(attrs_values(result.outputs)) @no_win @need_docker def test_docker_1(tmp_path): """docker env: simple command, no arguments""" - newcache = lambda x: makedir(tmp_path, x) - cmd = ["whoami"] + def newcache(x): + makedir(tmp_path, x) + + cmd = "whoami" docker = Docker(image="busybox") - shelly = ShellCommandTask( - name="shelly", executable=cmd, cache_dir=newcache("shelly") - ) - assert shelly.cmdline == " ".join(cmd) - env_res = docker.execute(shelly) - - shelly_env = ShellCommandTask( - name="shelly", - executable=cmd, - cache_dir=newcache("shelly_env"), - environment=docker, - ) - shelly_env() - assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ + Shelly = shell.define(cmd) + shelly = Shelly() + assert shelly.cmdline == cmd - shelly_call = ShellCommandTask( - name="shelly", executable=cmd, cache_dir=newcache("shelly_call") + shelly_job = Task( + definition=shelly, + submitter=Submitter(cache_dir=newcache("docker")), + name="docker", ) - shelly_call(environment=docker) - assert env_res == shelly_call.output_ == shelly_call.result().output.__dict__ + outputs_dict = docker.execute(shelly_job) + + with Submitter(cache_dir=newcache("docker_sub"), environment=docker) as sub: + result = sub(shelly) + + outputs = shelly(environment=docker, cache_dir=newcache("docker_call")) + # If busybox isn't found locally, then the stderr will have the download progress from + # the Docker auto-pull in it + for key in ["stdout", "return_code"]: + assert ( + outputs_dict[key] + == attrs_values(outputs)[key] + == attrs_values(result.outputs)[key] + ) @no_win @@ -90,160 +97,144 @@ def test_docker_1(tmp_path): ) def test_docker_1_subm(tmp_path, docker): """docker env with submitter: simple command, no arguments""" - newcache = lambda x: makedir(tmp_path, x) - cmd = ["whoami"] + def newcache(x): + makedir(tmp_path, x) + + cmd = "whoami" docker = Docker(image="busybox") - shelly = ShellCommandTask( - name="shelly", executable=cmd, cache_dir=newcache("shelly") - ) - assert shelly.cmdline == " ".join(cmd) - env_res = docker.execute(shelly) - - shelly_env = ShellCommandTask( - name="shelly", - executable=cmd, - cache_dir=newcache("shelly_env"), - environment=docker, + shelly = shell.define(cmd)() + shelly_job = Task( + definition=shelly, + submitter=Submitter(cache_dir=newcache("docker")), + name="docker", ) - with Submitter(plugin="cf") as sub: - shelly_env(submitter=sub) - assert env_res == shelly_env.result().output.__dict__ + assert shelly.cmdline == cmd + outputs_dict = docker.execute(shelly_job) - shelly_call = ShellCommandTask( - name="shelly", executable=cmd, cache_dir=newcache("shelly_call") - ) - with Submitter(plugin="cf") as sub: - shelly_call(submitter=sub, environment=docker) - assert env_res == shelly_call.result().output.__dict__ + with Submitter( + worker="cf", cache_dir=newcache("docker_sub"), environment=docker + ) as sub: + result = sub(shelly) + assert outputs_dict == attrs_values(result.outputs) + + outputs = shelly(cache_dir=newcache("docker_call"), environment=docker) + assert outputs_dict == attrs_values(outputs) @no_win @need_singularity def test_singularity_1(tmp_path): """singularity env: simple command, no arguments""" - newcache = lambda x: makedir(tmp_path, x) - cmd = ["whoami"] - sing = Singularity(image="docker://alpine") - shelly = ShellCommandTask( - name="shelly", executable=cmd, cache_dir=newcache("shelly") - ) - assert shelly.cmdline == " ".join(cmd) - env_res = sing.execute(shelly) - - shelly_env = ShellCommandTask( - name="shelly", - executable=cmd, - cache_dir=newcache("shelly_env"), - environment=sing, - ) - shelly_env() - assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ + def newcache(x): + makedir(tmp_path, x) - shelly_call = ShellCommandTask( - name="shelly", executable=cmd, cache_dir=newcache("shelly_call") + cmd = "whoami" + sing = Singularity(image="docker://alpine", xargs=["--fakeroot"]) + Shelly = shell.define(cmd) + shelly = Shelly() + shelly_job = Task( + definition=shelly, + submitter=Submitter(cache_dir=newcache("singu")), + name="singu", ) - shelly_call(environment=sing) - assert env_res == shelly_call.output_ == shelly_call.result().output.__dict__ + assert shelly.cmdline == cmd + outputs_dict = sing.execute(shelly_job) + + with Submitter(cache_dir=newcache("singu_sub"), environment=sing) as sub: + results = sub(shelly) + assert drop_stderr(outputs_dict) == drop_stderr(attrs_values(results.outputs)) + + outputs = shelly(environment=sing, cache_dir=newcache("singu_call")) + assert drop_stderr(outputs_dict) == drop_stderr(attrs_values(outputs)) @no_win @need_singularity def test_singularity_1_subm(tmp_path, plugin): """docker env with submitter: simple command, no arguments""" - newcache = lambda x: makedir(tmp_path, x) - cmd = ["whoami"] - sing = Singularity(image="docker://alpine") - shelly = ShellCommandTask( - name="shelly", executable=cmd, cache_dir=newcache("shelly") - ) - assert shelly.cmdline == " ".join(cmd) - env_res = sing.execute(shelly) - - shelly_env = ShellCommandTask( - name="shelly", - executable=cmd, - cache_dir=newcache("shelly_env"), - environment=sing, - ) - with Submitter(plugin=plugin) as sub: - shelly_env(submitter=sub) - assert env_res == shelly_env.result().output.__dict__ + def newcache(x): + makedir(tmp_path, x) - shelly_call = ShellCommandTask( - name="shelly", executable=cmd, cache_dir=newcache("shelly_call") + cmd = "whoami" + sing = Singularity(image="docker://alpine", xargs=["--fakeroot"]) + Shelly = shell.define(cmd) + shelly = Shelly() + shelly_job = Task( + definition=shelly, + submitter=Submitter(cache_dir=newcache("singu")), + name="singu", ) - with Submitter(plugin=plugin) as sub: - shelly_call(submitter=sub, environment=sing) - for key in [ - "stdout", - "return_code", - ]: # singularity gives info about cashed image in stderr - assert env_res[key] == shelly_call.result().output.__dict__[key] + assert shelly.cmdline == cmd + outputs_dict = sing.execute(shelly_job) + with Submitter( + worker=plugin, environment=sing, cache_dir=newcache("singu_sub") + ) as sub: + results = sub(shelly) + assert drop_stderr(outputs_dict) == drop_stderr(attrs_values(results.outputs)) -def create_shelly_inputfile(tempdir, filename, name, executable): + outputs = shelly(environment=sing, cache_dir=newcache("singu_call")) + # singularity gives info about cashed image in stderr + assert drop_stderr(outputs_dict) == drop_stderr(attrs_values(outputs)) + + +def shelly_with_input_factory(filename, executable) -> ShellDef: """creating a task with a simple input_spec""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "position": 1, - "help_string": "files", - "mandatory": True, - "argstr": "", - }, - ), + Shelly = shell.define( + executable, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + help="files", + argstr="", ) ], - bases=(ShellSpec,), ) + return Shelly(**({} if filename is None else {"file": filename})) + - kwargs = {} if filename is None else {"file": filename} - shelly = ShellCommandTask( +def make_job(task: ShellDef, tempdir: Path, name: str): + return Task( + definition=task, + submitter=Submitter(cache_dir=makedir(tempdir, name)), name=name, - executable=executable, - cache_dir=makedir(tempdir, name), - input_spec=my_input_spec, - **kwargs, ) - return shelly def test_shell_fileinp(tmp_path): """task with a file in the command/input""" + + def newcache(x): + return makedir(tmp_path, x) + input_dir = makedir(tmp_path, "inputs") filename = input_dir / "file.txt" with open(filename, "w") as f: f.write("hello ") - shelly = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly", executable=["cat"] - ) - env_res = Native().execute(shelly) + shelly = shelly_with_input_factory(filename=filename, executable="cat") + shelly_job = make_job(shelly, tmp_path, "native") + outputs_dict = Native().execute(shelly_job) - shelly_env = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly_env", executable=["cat"] - ) - shelly_env.environment = Native() - shelly_env() - assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ + with Submitter(environment=Native(), cache_dir=newcache("native_sub")) as sub: + results = sub(shelly) + assert outputs_dict == attrs_values(results.outputs) - shelly_call = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly_call", executable=["cat"] - ) - shelly_call(environment=Native()) - assert env_res == shelly_call.output_ == shelly_call.result().output.__dict__ + outputs = shelly(environment=Native(), cache_dir=newcache("native_call")) + assert outputs_dict == attrs_values(outputs) def test_shell_fileinp_st(tmp_path): """task (with a splitter) with a file in the command/input""" + + def newcache(x): + return makedir(tmp_path, x) + input_dir = makedir(tmp_path, "inputs") filename_1 = input_dir / "file_1.txt" with open(filename_1, "w") as f: @@ -255,28 +246,25 @@ def test_shell_fileinp_st(tmp_path): filename = [filename_1, filename_2] - shelly_env = create_shelly_inputfile( - tempdir=tmp_path, filename=None, name="shelly_env", executable=["cat"] - ) - shelly_env.environment = Native() - shelly_env.split(file=filename) - shelly_env() - assert shelly_env.result()[0].output.stdout.strip() == "hello" - assert shelly_env.result()[1].output.stdout.strip() == "hi" - - shelly_call = create_shelly_inputfile( - tempdir=tmp_path, filename=None, name="shelly_call", executable=["cat"] + shelly = shelly_with_input_factory(filename=None, executable="cat") + with Submitter(environment=Native(), cache_dir=newcache("native")) as sub: + results = sub(shelly.split(file=filename)) + assert [s.strip() for s in results.outputs.stdout] == ["hello", "hi"] + + outputs = shelly.split(file=filename)( + environment=Native(), cache_dir=newcache("native_call") ) - shelly_call.split(file=filename) - shelly_call(environment=Native()) - assert shelly_call.result()[0].output.stdout.strip() == "hello" - assert shelly_call.result()[1].output.stdout.strip() == "hi" + assert [s.strip() for s in outputs.stdout] == ["hello", "hi"] @no_win @need_docker def test_docker_fileinp(tmp_path): """docker env: task with a file in the command/input""" + + def newcache(x): + return makedir(tmp_path, x) + docker = Docker(image="busybox") input_dir = makedir(tmp_path, "inputs") @@ -284,30 +272,26 @@ def test_docker_fileinp(tmp_path): with open(filename, "w") as f: f.write("hello ") - shelly = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly", executable=["cat"] - ) - env_res = docker.execute(shelly) + shelly = shelly_with_input_factory(filename=filename, executable="cat") + outputs_dict = docker.execute(make_job(shelly, tmp_path, "docker")) - shelly_env = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly_env", executable=["cat"] - ) - shelly_env.environment = docker - shelly_env() + with Submitter(environment=docker, cache_dir=newcache("shell_sub")) as sub: + results = sub(shelly) - assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ + assert outputs_dict == attrs_values(results.outputs) - shelly_call = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly_call", executable=["cat"] - ) - shelly_call(environment=docker) - assert env_res == shelly_call.output_ == shelly_call.result().output.__dict__ + outputs = shelly(environment=docker, cache_dir=newcache("docker_call")) + assert outputs_dict == attrs_values(outputs) @no_win @need_docker def test_docker_fileinp_subm(tmp_path, plugin): """docker env with a submitter: task with a file in the command/input""" + + def newcache(x): + return makedir(tmp_path, x) + docker = Docker(image="busybox") input_dir = makedir(tmp_path, "inputs") @@ -315,31 +299,30 @@ def test_docker_fileinp_subm(tmp_path, plugin): with open(filename, "w") as f: f.write("hello ") - shelly = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly", executable=["cat"] - ) - env_res = docker.execute(shelly) + shelly = shelly_with_input_factory(filename=filename, executable="cat") + shelly_job = make_job(shelly, tmp_path, "docker_job") + outputs_dict = docker.execute(shelly_job) - shelly_env = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly_env", executable=["cat"] - ) - shelly_env.environment = docker - with Submitter(plugin=plugin) as sub: - shelly_env(submitter=sub) - assert env_res == shelly_env.result().output.__dict__ + with Submitter( + environment=docker, cache_dir=newcache("docker_sub"), worker=plugin + ) as sub: + results = sub(shelly) + with Submitter(worker=plugin) as sub: + results = sub(shelly) + assert outputs_dict == attrs_values(results.outputs) - shelly_call = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly_call", executable=["cat"] - ) - with Submitter(plugin=plugin) as sub: - shelly_call(submitter=sub, environment=docker) - assert env_res == shelly_call.result().output.__dict__ + outputs = shelly(environment=docker, cache_dir=newcache("docker_call")) + assert outputs_dict == attrs_values(outputs) @no_win @need_docker def test_docker_fileinp_st(tmp_path): """docker env: task (with a splitter) with a file in the command/input""" + + def newcache(x): + return makedir(tmp_path, x) + docker = Docker(image="busybox") input_dir = makedir(tmp_path, "inputs") @@ -353,64 +336,54 @@ def test_docker_fileinp_st(tmp_path): filename = [filename_1, filename_2] - shelly_env = create_shelly_inputfile( - tempdir=tmp_path, filename=None, name="shelly_env", executable=["cat"] - ) - shelly_env.environment = docker - shelly_env.split(file=filename) - shelly_env() - assert shelly_env.result()[0].output.stdout.strip() == "hello" - assert shelly_env.result()[1].output.stdout.strip() == "hi" - - shelly_call = create_shelly_inputfile( - tempdir=tmp_path, filename=None, name="shelly_call", executable=["cat"] + shelly = shelly_with_input_factory(filename=None, executable="cat") + + with Submitter(environment=docker, cache_dir=newcache("docker_sub")) as sub: + results = sub(shelly.split(file=filename)) + + assert [s.strip() for s in results.outputs.stdout] == ["hello", "hi"] + + outputs = shelly.split(file=filename)( + environment=docker, cache_dir=newcache("docker_call") ) - shelly_call.split(file=filename) - shelly_call(environment=docker) - assert shelly_call.result()[0].output.stdout.strip() == "hello" - assert shelly_call.result()[1].output.stdout.strip() == "hi" + assert [s.strip() for s in outputs.stdout] == ["hello", "hi"] -def create_shelly_outputfile(tempdir, filename, name, executable="cp"): +def shelly_outputfile_factory(filename, executable="cp"): """creating a task with an input_spec that contains a template""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file_orig", - attr.ib( - type=File, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, - ), + Shelly = shell.define( + executable, + inputs=[ + shell.arg( + name="file_orig", + type=File, + position=1, + help="new file", + argstr="", ), - ( - "file_copy", - attr.ib( - type=str, - metadata={ - "output_file_template": "{file_orig}_copy", - "help_string": "output file", - "argstr": "", - }, - ), + ], + outputs=[ + shell.outarg( + name="file_copy", + type=File, + path_template="{file_orig}_copy", + help="output file", + argstr="", + position=2, + keep_extension=True, ), ], - bases=(ShellSpec,), ) - kwargs = {} if filename is None else {"file_orig": filename} - shelly = ShellCommandTask( - name=name, - executable=executable, - cache_dir=makedir(tempdir, name), - input_spec=my_input_spec, - **kwargs, - ) - return shelly + return Shelly(**({} if filename is None else {"file_orig": filename})) def test_shell_fileout(tmp_path): """task with a file in the output""" + + def newcache(x): + return Path(makedir(tmp_path, x)) + input_dir = makedir(tmp_path, "inputs") filename = input_dir / "file.txt" with open(filename, "w") as f: @@ -418,31 +391,27 @@ def test_shell_fileout(tmp_path): # execute does not create the cashedir, so this part will fail, # but I guess we don't want to use it this way anyway - # shelly = create_shelly_outputfile(tempdir=tmp_path, filename=filename, name="shelly") - # env_res = Native().execute(shelly) + # shelly = create_shelly_outputfile(tempdir=tmp_path, filename=filename, name="native") + # outputs_dict = Native().execute(shelly) - shelly_env = create_shelly_outputfile( - tempdir=tmp_path, filename=filename, name="shelly_env" - ) - shelly_env.environment = Native() - shelly_env() - assert ( - Path(shelly_env.result().output.file_copy) - == shelly_env.output_dir / "file_copy.txt" - ) + shelly = shelly_outputfile_factory(filename=filename) - shelly_call = create_shelly_outputfile( - tempdir=tmp_path, filename=filename, name="shelly_call" - ) - shelly_call(environment=Native()) - assert ( - Path(shelly_call.result().output.file_copy) - == shelly_call.output_dir / "file_copy.txt" - ) + with Submitter(environment=Native(), cache_dir=newcache("native_sub")) as sub: + result = sub(shelly) + assert Path(result.outputs.file_copy) == result.output_dir / "file_copy.txt" + + call_cache = newcache("native_call") + + outputs = shelly(environment=Native(), cache_dir=call_cache) + assert Path(outputs.file_copy) == call_cache / shelly._checksum / "file_copy.txt" def test_shell_fileout_st(tmp_path): """task (with a splitter) with a file in the output""" + + def newcache(x): + return Path(makedir(tmp_path, x)) + input_dir = makedir(tmp_path, "inputs") filename_1 = input_dir / "file_1.txt" with open(filename_1, "w") as f: @@ -454,40 +423,31 @@ def test_shell_fileout_st(tmp_path): filename = [filename_1, filename_2] - shelly_env = create_shelly_outputfile( - tempdir=tmp_path, filename=None, name="shelly_env" - ) - shelly_env.environment = Native() - shelly_env.split(file_orig=filename) - shelly_env() - assert ( - Path(shelly_env.result()[0].output.file_copy) - == shelly_env.output_dir[0] / "file_1_copy.txt" - ) - assert ( - Path(shelly_env.result()[1].output.file_copy) - == shelly_env.output_dir[1] / "file_2_copy.txt" - ) + shelly = shelly_outputfile_factory(filename=None) + with Submitter(environment=Native(), cache_dir=newcache("native")) as sub: + results = sub(shelly.split(file_orig=filename)) - shelly_call = create_shelly_outputfile( - tempdir=tmp_path, filename=None, name="shelly_call" - ) - shelly_call.split(file_orig=filename) - shelly_call(environment=Native()) - assert ( - Path(shelly_call.result()[0].output.file_copy) - == shelly_call.output_dir[0] / "file_1_copy.txt" - ) - assert ( - Path(shelly_call.result()[1].output.file_copy) - == shelly_call.output_dir[1] / "file_2_copy.txt" + assert [f.name for f in results.outputs.file_copy] == [ + "file_1_copy.txt", + "file_2_copy.txt", + ] + + call_cache = newcache("native_call") + + outputs = shelly.split(file_orig=filename)( + environment=Native(), cache_dir=call_cache ) + assert [f.name for f in outputs.file_copy] == ["file_1_copy.txt", "file_2_copy.txt"] @no_win @need_docker def test_docker_fileout(tmp_path): """docker env: task with a file in the output""" + + def newcache(x): + return Path(makedir(tmp_path, x)) + docker_env = Docker(image="busybox") input_dir = makedir(tmp_path, "inputs") @@ -495,21 +455,21 @@ def test_docker_fileout(tmp_path): with open(filename, "w") as f: f.write("hello ") - shelly_env = create_shelly_outputfile( - tempdir=tmp_path, filename=filename, name="shelly_env" - ) - shelly_env.environment = docker_env - shelly_env() - assert ( - Path(shelly_env.result().output.file_copy) - == shelly_env.output_dir / "file_copy.txt" - ) + shelly = shelly_outputfile_factory(filename=filename) + + with Submitter(environment=docker_env, cache_dir=newcache("docker")) as sub: + results = sub(shelly) + assert results.outputs.file_copy == File(results.output_dir / "file_copy.txt") @no_win @need_docker def test_docker_fileout_st(tmp_path): """docker env: task (with a splitter) with a file in the output""" + + def newcache(x): + return Path(makedir(tmp_path, x)) + docker_env = Docker(image="busybox") input_dir = makedir(tmp_path, "inputs") @@ -523,17 +483,11 @@ def test_docker_fileout_st(tmp_path): filename = [filename_1, filename_2] - shelly_env = create_shelly_outputfile( - tempdir=tmp_path, filename=None, name="shelly_env" - ) - shelly_env.environment = docker_env - shelly_env.split(file_orig=filename) - shelly_env() - assert ( - Path(shelly_env.result()[0].output.file_copy) - == shelly_env.output_dir[0] / "file_1_copy.txt" - ) - assert ( - Path(shelly_env.result()[1].output.file_copy) - == shelly_env.output_dir[1] / "file_2_copy.txt" - ) + shelly = shelly_outputfile_factory(filename=None) + + with Submitter(environment=docker_env, cache_dir=newcache("docker_sub")) as sub: + results = sub(shelly.split(file_orig=filename)) + assert [f.name for f in results.outputs.file_copy] == [ + "file_1_copy.txt", + "file_2_copy.txt", + ] diff --git a/pydra/engine/tests/test_functions.py b/pydra/engine/tests/test_functions.py new file mode 100644 index 0000000000..ab9ee9cd43 --- /dev/null +++ b/pydra/engine/tests/test_functions.py @@ -0,0 +1,240 @@ +import pytest +import random +import typing as ty +from pydra.design.base import Field +from pydra.design import python +from pydra.engine.specs import PythonDef, PythonOutputs +from pydra.engine.helpers import list_fields, attrs_values + + +def non_func_fields(defn: PythonDef) -> list[Field]: + return [f for f in list_fields(defn) if f.name != "function"] + + +def non_func_values(defn: PythonDef) -> dict: + return {n: v for n, v in attrs_values(defn).items() if n != "function"} + + +def hashes(defn: PythonDef) -> dict[str, str]: + return defn._compute_hashes()[1] + + +def test_task_equivalence(): + """testing equivalence of tasks created in different ways""" + + def add_two(a: int) -> int: + return a + 2 + + @python.define + class Canonical(PythonDef["Canonical.Outputs"]): + + a: ty.Any + + class Outputs(PythonOutputs): + out: int + + @staticmethod + def function(a: int) -> int: + return a + 2 + + canonical = Canonical(a=3) + + decorated1 = python.define(add_two)(a=3) + + @python.define + def addtwo(a: int) -> int: + return a + 2 + + decorated2 = addtwo(a=3) + + assert canonical._compute_hashes()[1] == decorated1._compute_hashes()[1] + assert canonical._compute_hashes()[1] == decorated2._compute_hashes()[1] + + c_outputs = canonical() + d1_outputs = decorated1() + d2_outputs = decorated2() + + assert ( + non_func_values(c_outputs) + == non_func_values(d1_outputs) + == non_func_values(d2_outputs) + ) + + +def test_annotation_equivalence_1(): + """testing various ways of annotation: one output, only types provided""" + + def direct(a: int) -> int: + return a + 2 + + Direct = python.define(direct) + + @python.define(outputs={"out": int}) + def Partial(a: int): + return a + 2 + + @python.define(inputs={"a": int}, outputs={"out": int}) + def Indirect(a): + return a + 2 + + assert non_func_fields(Direct) == non_func_fields(Partial) + assert non_func_fields(Direct) == non_func_fields(Indirect) + + assert list_fields(Direct.Outputs) == list_fields(Partial.Outputs) + assert list_fields(Direct.Outputs) == list_fields(Indirect.Outputs) + + # Run functions to ensure behavior is unaffected + a = random.randint(0, (1 << 32) - 3) + assert non_func_values(Direct(a=a)) == non_func_values(Partial(a=a)) + assert non_func_values(Direct(a=a)) == non_func_values(Indirect(a=a)) + + # checking if the annotation is properly converted to output_spec if used in task + assert list_fields(Direct.Outputs)[0] == python.out(name="out", type=int) + + +def test_annotation_equivalence_2(): + """testing various ways of annotation: multiple outputs, using a tuple for output annot.""" + + def direct(a: int) -> tuple[int, float]: + return a + 2, a + 2.0 + + Direct = python.define(direct, outputs=["out1", "out2"]) + + @python.define(outputs={"out1": int, "out2": float}) + def Partial(a: int): + return a + 2, a + 2.0 + + @python.define(inputs={"a": int}, outputs=["out1", "out2"]) + def Indirect(a) -> tuple[int, float]: + return a + 2, a + 2.0 + + # checking if the annotations are equivalent + assert ( + non_func_fields(Direct) == non_func_fields(Partial) == non_func_fields(Indirect) + ) + + # Run functions to ensure behavior is unaffected + a = random.randint(0, (1 << 32) - 3) + assert hashes(Direct(a=a)) == hashes(Partial(a=a)) == hashes(Indirect(a=a)) + + # checking if the annotation is properly converted to output_spec if used in task + assert list_fields(Direct.Outputs) == [ + python.out(name="out1", type=int), + python.out(name="out2", type=float), + ] + + +def test_annotation_equivalence_3(): + """testing various ways of annotation: using dictionary for output annot.""" + + def direct(a: int) -> int: + return a + 2 + + Direct = python.define(direct, outputs=["out1"]) + + @python.define(outputs={"out1": int}) + def Partial(a: int): + return a + 2 + + @python.define(inputs={"a": int}, outputs={"out1": int}) + def Indirect(a): + return a + 2 + + # checking if the annotations are equivalent + assert ( + non_func_fields(Direct) == non_func_fields(Partial) == non_func_fields(Indirect) + ) + + # Run functions to ensure behavior is unaffected + a = random.randint(0, (1 << 32) - 3) + assert hashes(Direct(a=a)) == hashes(Partial(a=a)) == hashes(Indirect(a=a)) + + # checking if the annotation is properly converted to output_spec if used in task + assert list_fields(Direct.Outputs)[0] == python.out(name="out1", type=int) + + +def test_annotation_equivalence_4(): + """testing various ways of annotation: using ty.NamedTuple for the output""" + + @python.define(outputs=["sum", "sub"]) + def Direct(a: int) -> tuple[int, int]: + return a + 2, a - 2 + + @python.define(outputs={"sum": int, "sub": int}) + def Partial(a: int): + return a + 2, a - 2 + + @python.define(inputs={"a": int}, outputs={"sum": int, "sub": int}) + def Indirect(a): + return a + 2, a - 2 + + # checking if the annotations are equivalent + assert ( + list_fields(Direct.Outputs) + == list_fields(Partial.Outputs) + == list_fields(Indirect.Outputs) + ) + assert ( + list_fields(Direct.Outputs) + == list_fields(Partial.Outputs) + == list_fields(Indirect.Outputs) + ) + + # Run functions to ensure behavior is unaffected + a = random.randint(0, (1 << 32) - 3) + assert hashes(Direct(a=a)) == hashes(Partial(a=a)) == hashes(Indirect(a=a)) + + # checking if the annotation is properly converted to output_spec if used in task + assert list_fields(Direct.Outputs) == [ + python.out(name="sum", type=int), + python.out(name="sub", type=int), + ] + + +def test_invalid_annotation(): + with pytest.raises(ValueError, match="Unrecognised input names"): + + @python.define(inputs={"b": int}) + def addtwo(a): + return a + 2 + + +def test_annotated_task(): + + @python.define + def Square(in_val: float): + return in_val**2 + + outputs = Square(in_val=2.0)() + assert outputs.out == 4.0 + + +def test_return_annotated_task(): + + @python.define(inputs={"in_val": float}, outputs={"squared": float}) + def Square(in_val): + return in_val**2 + + outputs = Square(in_val=2.0)() + assert outputs.squared == 4.0 + + +def test_return_halfannotated_annotated_task(): + + @python.define(inputs={"in_val": float}, outputs={"out": float}) + def Square(in_val): + return in_val**2 + + outputs = Square(in_val=2.0)() + assert outputs.out == 4.0 + + +def test_return_annotated_task_multiple_output(): + + @python.define(inputs={"in_val": float}, outputs={"squared": float, "cubed": float}) + def Square(in_val): + return in_val**2, in_val**3 + + outputs = Square(in_val=2.0)() + assert outputs.squared == 4.0 + assert outputs.cubed == 8.0 diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py index 48fd6e3120..d8d1d0bd00 100644 --- a/pydra/engine/tests/test_helpers.py +++ b/pydra/engine/tests/test_helpers.py @@ -5,47 +5,45 @@ import platform import typing as ty import pytest -import attrs import cloudpickle as cp -from unittest.mock import Mock +from pydra.engine.submitter import Submitter +from pydra.engine.specs import Result +from pydra.engine.core import Task +from pydra.design import workflow from fileformats.generic import Directory, File -from fileformats.core import FileSet -from .utils import multiply, raise_xeq1 +from .utils import Multiply, RaiseXeq1 from ..helpers import ( get_available_cpus, save, load_and_run, position_sort, - parse_copyfile, - argstr_formatting, parse_format_string, ) -from ...utils.hash import hash_function -from ..core import Workflow +from pydra.utils.hash import hash_function def test_save(tmpdir): outdir = Path(tmpdir) with pytest.raises(ValueError): save(tmpdir) - foo = multiply(name="mult", x=1, y=2) + foo = Task(name="mult", definition=Multiply(x=1, y=2), submitter=Submitter()) # save task save(outdir, task=foo) del foo # load saved task task_pkl = outdir / "_task.pklz" - foo = cp.loads(task_pkl.read_bytes()) + foo: Task = cp.loads(task_pkl.read_bytes()) assert foo.name == "mult" - assert foo.inputs.x == 1 and foo.inputs.y == 2 + assert foo.inputs["x"] == 1 and foo.inputs["y"] == 2 # execute task and save result - res = foo() - assert res.output.out == 2 + res: Result = foo.run() + assert res.outputs.out == 2 save(outdir, result=res) del res # load saved result res_pkl = outdir / "_result.pklz" - res = cp.loads(res_pkl.read_bytes()) - assert res.output.out == 2 + res: Result = cp.loads(res_pkl.read_bytes()) + assert res.outputs.out == 2 def test_hash_file(tmpdir): @@ -180,47 +178,39 @@ def test_get_available_cpus(): def test_load_and_run(tmpdir): """testing load_and_run for pickled task""" task_pkl = Path(tmpdir.join("task_main.pkl")) - - task = multiply(name="mult", y=10).split(x=[1, 2]) - task.state.prepare_states(inputs=task.inputs) - task.state.prepare_inputs() + # Note that tasks now don't have state arrays and indices, just a single resolved + # set of parameters that are ready to run + task = Task(name="mult", definition=Multiply(x=2, y=10), submitter=Submitter()) with task_pkl.open("wb") as fp: cp.dump(task, fp) - - resultfile_0 = load_and_run(task_pkl=task_pkl, ind=0) - resultfile_1 = load_and_run(task_pkl=task_pkl, ind=1) + resultfile = load_and_run(task_pkl=task_pkl) # checking the result files - result_0 = cp.loads(resultfile_0.read_bytes()) - result_1 = cp.loads(resultfile_1.read_bytes()) - assert result_0.output.out == 10 - assert result_1.output.out == 20 - - -def test_load_and_run_exception_load(tmpdir): - """testing raising exception and saving info in crashfile when when load_and_run""" - task_pkl = Path(tmpdir.join("task_main.pkl")) - raise_xeq1(name="raise").split("x", x=[1, 2]) - with pytest.raises(FileNotFoundError): - load_and_run(task_pkl=task_pkl, ind=0) + result = cp.loads(resultfile.read_bytes()) + assert result.outputs.out == 20 def test_load_and_run_exception_run(tmpdir): """testing raising exception and saving info in crashfile when when load_and_run""" task_pkl = Path(tmpdir.join("task_main.pkl")) + cache_root = Path(tmpdir.join("cache")) + cache_root.mkdir() - task = raise_xeq1(name="raise").split("x", x=[1, 2]) - task.state.prepare_states(inputs=task.inputs) - task.state.prepare_inputs() + task = Task( + definition=RaiseXeq1(x=1), + name="raise", + submitter=Submitter(worker="cf", cache_dir=cache_root), + ) with task_pkl.open("wb") as fp: cp.dump(task, fp) with pytest.raises(Exception) as excinfo: - load_and_run(task_pkl=task_pkl, ind=0) - assert "i'm raising an exception!" in str(excinfo.value) + load_and_run(task_pkl=task_pkl) + exc_msg = excinfo.value.args[0] + assert "i'm raising an exception!" in exc_msg # checking if the crashfile has been created - assert "crash" in str(excinfo.value) - errorfile = Path(str(excinfo.value).split("here: ")[1][:-2]) + assert "crash" in excinfo.value.__notes__[0] + errorfile = Path(excinfo.value.__notes__[0].split("here: ")[1]) assert errorfile.exists() resultfile = errorfile.parent / "_result.pklz" @@ -229,37 +219,40 @@ def test_load_and_run_exception_run(tmpdir): result_exception = cp.loads(resultfile.read_bytes()) assert result_exception.errored is True + task = Task(definition=RaiseXeq1(x=2), name="wont_raise", submitter=Submitter()) + + with task_pkl.open("wb") as fp: + cp.dump(task, fp) + # the second task should be fine - resultfile = load_and_run(task_pkl=task_pkl, ind=1) + resultfile = load_and_run(task_pkl=task_pkl) result_1 = cp.loads(resultfile.read_bytes()) - assert result_1.output.out == 2 + assert result_1.outputs.out == 2 -def test_load_and_run_wf(tmpdir): +@pytest.mark.parametrize("worker", ["cf", "debug"]) +def test_load_and_run_wf(tmpdir, worker): """testing load_and_run for pickled task""" wf_pkl = Path(tmpdir.join("wf_main.pkl")) - wf = Workflow(name="wf", input_spec=["x", "y"], y=10) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.split("x", x=[1, 2]) - - wf.set_output([("out", wf.mult.lzout.out)]) + @workflow.define + def Workflow(x, y=10): + multiply = workflow.add(Multiply(x=x, y=y)) + return multiply.out - # task = multiply(name="mult", x=[1, 2], y=10).split("x") - wf.state.prepare_states(inputs=wf.inputs) - wf.state.prepare_inputs() - wf.plugin = "cf" + task = Task( + name="mult", + definition=Workflow(x=2), + submitter=Submitter(cache_dir=tmpdir, worker=worker), + ) with wf_pkl.open("wb") as fp: - cp.dump(wf, fp) + cp.dump(task, fp) - resultfile_0 = load_and_run(ind=0, task_pkl=wf_pkl) - resultfile_1 = load_and_run(ind=1, task_pkl=wf_pkl) + resultfile = load_and_run(task_pkl=wf_pkl) # checking the result files - result_0 = cp.loads(resultfile_0.read_bytes()) - result_1 = cp.loads(resultfile_1.read_bytes()) - assert result_0.output.out == 10 - assert result_1.output.out == 20 + result = cp.loads(resultfile.read_bytes()) + assert result.outputs.out == 20 @pytest.mark.parametrize( @@ -277,63 +270,6 @@ def test_position_sort(pos_args): assert final_args == ["a", "b", "c"] -def test_parse_copyfile(): - Mode = FileSet.CopyMode - Collation = FileSet.CopyCollation - - def mock_field(copyfile): - mock = Mock(["metadata"]) - mock.metadata = {"copyfile": copyfile} - return mock - - assert parse_copyfile(mock_field((Mode.any, Collation.any))) == ( - Mode.any, - Collation.any, - ) - assert parse_copyfile(mock_field("copy"), default_collation=Collation.siblings) == ( - Mode.copy, - Collation.siblings, - ) - assert parse_copyfile(mock_field("link,adjacent")) == ( - Mode.link, - Collation.adjacent, - ) - assert parse_copyfile(mock_field(True)) == ( - Mode.copy, - Collation.any, - ) - assert parse_copyfile(mock_field(False)) == ( - Mode.link, - Collation.any, - ) - assert parse_copyfile(mock_field(None)) == ( - Mode.any, - Collation.any, - ) - with pytest.raises(TypeError, match="Unrecognised type for mode copyfile"): - parse_copyfile(mock_field((1, 2))) - with pytest.raises(TypeError, match="Unrecognised type for collation copyfile"): - parse_copyfile(mock_field((Mode.copy, 2))) - - -def test_argstr_formatting(): - @attrs.define - class Inputs: - a1_field: str - b2_field: float - c3_field: ty.Dict[str, str] - d4_field: ty.List[str] - - inputs = Inputs("1", 2.0, {"c": "3"}, ["4"]) - assert ( - argstr_formatting( - "{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}", - inputs, - ) - == "1 2.000000 -test 3 -me 4" - ) - - def test_parse_format_string1(): assert parse_format_string("{a}") == {"a"} diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index ea5dd2afdc..f940a6f398 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -1,13 +1,14 @@ import typing as ty import sys +import os from pathlib import Path -import attr from unittest.mock import Mock import pytest from fileformats.generic import File -from ..specs import SpecInfo, ShellSpec -from ..task import ShellCommandTask -from ..helpers_file import ( +from pydra.engine.specs import ShellDef, ShellOutputs +from pydra.design import shell +from pydra.engine.helpers import list_fields +from pydra.engine.helpers_file import ( ensure_list, MountIndentifier, copy_nested_files, @@ -354,66 +355,52 @@ def test_output_template(tmp_path): filename = str(tmp_path / "file.txt") with open(filename, "w") as f: f.write("hello from pydra") - in_file = File(filename) - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "in_file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), - ), - ( - "optional", - attr.ib( - type=ty.Union[Path, bool], - default=False, - metadata={ - "position": 2, - "argstr": "--opt", - "output_file_template": "{in_file}.out", - "help_string": "optional file output", - }, - ), - ), - ], - bases=(ShellSpec,), - ) - class MyCommand(ShellCommandTask): + @shell.define + class MyCommand(ShellDef["MyCommand.Outputs"]): + in_file: File = shell.arg( + position=1, + argstr="", + help="input file", + ) + optional: File | None = shell.outarg( + position=2, + argstr="--opt", + path_template="{in_file}.out", + help="optional file output", + ) + + class Outputs(ShellOutputs): + pass + executable = "my" - input_spec = my_input_spec - task = MyCommand(in_file=filename) - assert task.cmdline == f"my {filename}" - task.inputs.optional = True - assert task.cmdline == f"my {filename} --opt {task.output_dir / 'file.out'}" - task.inputs.optional = False - assert task.cmdline == f"my {filename}" - task.inputs.optional = "custom-file-out.txt" - assert task.cmdline == f"my {filename} --opt custom-file-out.txt" + defn = MyCommand(in_file=filename) + assert defn.cmdline == f"my {filename}" + defn.optional = True + file_out_path = os.path.join(os.getcwd(), "file.out") + if " " in file_out_path: + file_out_path = f"'{file_out_path}'" + assert defn.cmdline == f"my {filename} --opt {file_out_path}" + defn.optional = False + assert defn.cmdline == f"my {filename}" + defn.optional = "custom-file-out.txt" + assert defn.cmdline == f"my {filename} --opt custom-file-out.txt" -def test_template_formatting(tmp_path): +def test_template_formatting(tmp_path: Path): field = Mock() field.name = "grad" field.argstr = "--grad" - field.metadata = {"output_file_template": ("{in_file}.bvec", "{in_file}.bval")} - inputs = Mock() - inputs_dict = {"in_file": "/a/b/c/file.txt", "grad": True} + field.path_template = ("{in_file}.bvec", "{in_file}.bval") + field.keep_extension = False + definition = Mock() + values = {"in_file": Path("/a/b/c/file.txt"), "grad": True} assert template_update_single( field, - inputs, - inputs_dict_st=inputs_dict, + definition, + values=values, output_dir=tmp_path, spec_type="input", - ) == [str(tmp_path / "file.bvec"), str(tmp_path / "file.bval")] + ) == [tmp_path / "file.bvec", tmp_path / "file.bval"] diff --git a/pydra/engine/tests/test_nipype1_convert.py b/pydra/engine/tests/test_nipype1_convert.py deleted file mode 100644 index 8408fddb6c..0000000000 --- a/pydra/engine/tests/test_nipype1_convert.py +++ /dev/null @@ -1,122 +0,0 @@ -import typing as ty -import pytest - - -from ..task import ShellCommandTask -from ..specs import ShellOutSpec, ShellSpec, SpecInfo, File - -interf_input_spec = SpecInfo( - name="Input", fields=[("test", ty.Any, {"help_string": "test"})], bases=(ShellSpec,) -) -interf_output_spec = SpecInfo( - name="Output", fields=[("test_out", File, "*.txt")], bases=(ShellOutSpec,) -) - - -class Interf_1(ShellCommandTask): - """class with customized input/output specs""" - - input_spec = interf_input_spec - output_spec = interf_output_spec - - -class Interf_2(ShellCommandTask): - """class with customized input/output specs and executables""" - - input_spec = interf_input_spec - output_spec = interf_output_spec - executable = "testing command" - - -class Interf_3(ShellCommandTask): - """class with customized input and executables""" - - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "in_file", - str, - {"help_string": "in_file", "argstr": "'{in_file}'"}, - ) - ], - bases=(ShellSpec,), - ) - executable = "testing command" - - -class TouchInterf(ShellCommandTask): - """class with customized input and executables""" - - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "new_file", - str, - { - "help_string": "new_file", - "argstr": "", - "output_file_template": "{new_file}", - }, - ) - ], - bases=(ShellSpec,), - ) - executable = "touch" - - -def test_interface_specs_1(): - """testing if class input/output spec are set properly""" - task = Interf_1(executable="ls") - assert task.input_spec == interf_input_spec - assert task.output_spec == interf_output_spec - - -def test_interface_specs_2(): - """testing if class input/output spec are overwritten properly by the user's specs""" - my_input_spec = SpecInfo( - name="Input", - fields=[("my_inp", ty.Any, {"help_string": "my inp"})], - bases=(ShellSpec,), - ) - my_output_spec = SpecInfo( - name="Output", fields=[("my_out", File, "*.txt")], bases=(ShellOutSpec,) - ) - task = Interf_1(input_spec=my_input_spec, output_spec=my_output_spec) - assert task.input_spec == my_input_spec - assert task.output_spec == my_output_spec - - -def test_interface_executable_1(): - """testing if the class executable is properly set and used in the command line""" - task = Interf_2() - assert task.executable == "testing command" - assert task.inputs.executable == "testing command" - assert task.cmdline == "testing command" - - -def test_interface_executable_2(): - """testing if the class executable is overwritten by the user's input (and if the warning is raised)""" - # warning that the user changes the executable from the one that is set as a class attribute - with pytest.warns(UserWarning, match="changing the executable"): - task = Interf_2(executable="i want a different command") - assert task.executable == "testing command" - # task.executable stays the same, but input.executable is changed, so the cmd is changed - assert task.inputs.executable == "i want a different command" - assert task.cmdline == "i want a different command" - - -def test_interface_cmdline_with_spaces(): - task = Interf_3(in_file="/path/to/file/with spaces") - assert task.executable == "testing command" - assert task.inputs.executable == "testing command" - assert task.cmdline == "testing command '/path/to/file/with spaces'" - - -def test_interface_run_1(): - """testing execution of a simple interf with customized input and executable""" - task = TouchInterf(new_file="hello.txt") - assert task.cmdline == "touch hello.txt" - res = task() - assert res.output.new_file.fspath.exists() diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index bceaf97402..dd144e1672 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -1,32 +1,59 @@ import os import shutil -import attr +import attrs import typing as ty import numpy as np import time -from unittest import mock from pathlib import Path import pytest -import time from fileformats.generic import File -import pydra.mark +from pydra.design import python, workflow from .utils import ( - fun_addtwo, - fun_addvar, - fun_addvar_none, - fun_addvar_default, - moment, - fun_div, - fun_dict, - fun_file, - fun_file_list, - op_4var, + FunAddTwo, + FunAddVar, + FunAddVarNone, + FunAddVarDefault, + Moment, + FunDiv, + FunDict, + FunFile, + FunFileList, + Op4Var, ) +from pydra.engine.specs import TaskDef +from pydra.engine.state import State +from pydra.utils.typing import StateArray +from pydra.engine.submitter import Submitter +from pydra.engine.core import Workflow +from pydra.engine.helpers import attrs_values + + +@workflow.define +def IdentityWorkflow(a: int) -> int: + + @python.define + def Identity(a): + return a + + a = workflow.add(Identity(a=a)) + return a.out -from ..core import TaskBase -from ..specs import StateArray -from ..submitter import Submitter + +def get_state(task: TaskDef, name="NA") -> State: + """helper function to get the state of the task once it has been added to workflow""" + identity_workflow = IdentityWorkflow(a=1) + wf = Workflow.construct(identity_workflow, dont_cache=True) + wf.add(task, name=name) + node = wf[name] + if node.state: + node.state.prepare_states(node.state_values) + node.state.prepare_inputs() + return node.state + + +def num_python_cache_dirs(cache_path: Path) -> int: + return len(list(cache_path.glob("python-*"))) @pytest.fixture(scope="module") @@ -42,26 +69,18 @@ def move2orig(): request.addfinalizer(move2orig) -# Tests for tasks initializations -def test_task_init_1(): - """task with mandatory arguments only""" - nn = fun_addtwo() - assert isinstance(nn, TaskBase) - assert nn.name == "fun_addtwo" - assert hasattr(nn, "__call__") - - def test_task_init_1a(): with pytest.raises(TypeError): - fun_addtwo("NA") + FunAddTwo("NA") def test_task_init_2(): """task with a name and inputs""" - nn = fun_addtwo(name="NA", a=3) + nn = FunAddTwo(a=3) # adding NA to the name of the variable - assert getattr(nn.inputs, "a") == 3 - assert nn.state is None + assert nn.a == 3 + state = get_state(nn) + assert state is None @pytest.mark.parametrize( @@ -77,15 +96,15 @@ def test_task_init_3( if input_type == "array": a_in = np.array(a_in) - nn = fun_addtwo(name="NA").split(splitter=splitter, a=a_in) + nn = FunAddTwo().split(splitter, a=a_in) - assert np.allclose(nn.inputs.a, [3, 5]) - assert nn.state.splitter == state_splitter - assert nn.state.splitter_rpn == state_rpn + assert np.allclose(nn.a, [3, 5]) + state = get_state(nn) + assert state.splitter == state_splitter + assert state.splitter_rpn == state_rpn - nn.state.prepare_states(nn.inputs) - assert nn.state.states_ind == states_ind - assert nn.state.states_val == states_val + assert state.states_ind == states_ind + assert state.states_val == states_val @pytest.mark.parametrize( @@ -127,176 +146,164 @@ def test_task_init_3a( a_in, b_in = np.array(a_in), np.array(b_in) elif input_type == "mixed": a_in = np.array(a_in) - nn = fun_addvar(name="NA").split(splitter=splitter, a=a_in, b=b_in) + nn = FunAddVar().split(splitter, a=a_in, b=b_in) + state = get_state(nn) - assert np.allclose(nn.inputs.a, [3, 5]) - assert np.allclose(nn.inputs.b, [10, 20]) - assert nn.state.splitter == state_splitter - assert nn.state.splitter_rpn == state_rpn + assert np.allclose(nn.a, [3, 5]) + assert np.allclose(nn.b, [10, 20]) + assert state.splitter == state_splitter + assert state.splitter_rpn == state_rpn - nn.state.prepare_states(nn.inputs) - assert nn.state.states_ind == states_ind - assert nn.state.states_val == states_val + assert state.states_ind == states_ind + assert state.states_val == states_val def test_task_init_4(): """task with interface splitter and inputs set in the split method""" - nn = fun_addtwo(name="NA") - nn.split(splitter="a", a=[3, 5]) - assert np.allclose(nn.inputs.a, [3, 5]) + nn = FunAddTwo() + nn = nn.split("a", a=[3, 5]) + state = get_state(nn) + assert np.allclose(nn.a, [3, 5]) - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] - nn.state.prepare_states(nn.inputs) - assert nn.state.states_ind == [{"NA.a": 0}, {"NA.a": 1}] - assert nn.state.states_val == [{"NA.a": 3}, {"NA.a": 5}] + assert state.states_ind == [{"NA.a": 0}, {"NA.a": 1}] + assert state.states_val == [{"NA.a": 3}, {"NA.a": 5}] def test_task_init_4b(): """updating splitter using overwrite=True""" - nn = fun_addtwo(name="NA") - nn.split(splitter="a", a=[1, 2]) - nn.split(splitter="a", a=[3, 5], overwrite=True) - assert np.allclose(nn.inputs.a, [3, 5]) + nn = FunAddTwo() + nn = nn.split("a", a=[1, 2]) + nn = nn.split("a", a=[3, 5], overwrite=True) + state = get_state(nn) + assert np.allclose(nn.a, [3, 5]) - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] - nn.state.prepare_states(nn.inputs) - assert nn.state.states_ind == [{"NA.a": 0}, {"NA.a": 1}] - assert nn.state.states_val == [{"NA.a": 3}, {"NA.a": 5}] + assert state.states_ind == [{"NA.a": 0}, {"NA.a": 1}] + assert state.states_val == [{"NA.a": 3}, {"NA.a": 5}] def test_task_init_4c(): """trying to set splitter twice without using overwrite""" - nn = fun_addvar(name="NA").split(splitter="b", b=[1, 2]) + nn = FunAddVar().split("b", b=[1, 2]) + state = get_state(nn) with pytest.raises(Exception) as excinfo: - nn.split(splitter="a", a=[3, 5]) - assert "splitter has been already set" in str(excinfo.value) + nn.split("a", a=[3, 5]) + assert "Cannot overwrite existing splitter" in str(excinfo.value) - assert nn.state.splitter == "NA.b" + assert state.splitter == "NA.b" def test_task_init_4d(): """trying to set the same splitter twice without using overwrite if the splitter is the same, the exception shouldn't be raised """ - nn = fun_addtwo(name="NA").split(splitter="a", a=[3, 5]) - nn.split(splitter="a", a=[3, 5]) - assert nn.state.splitter == "NA.a" + nn = FunAddTwo().split("a", a=[3, 5]) + nn = nn.split("a", a=[3, 5], overwrite=True) + state = get_state(nn) + assert state.splitter == "NA.a" def test_task_init_5(): """task with inputs, splitter and combiner""" - nn = ( - fun_addvar(name="NA") - .split(splitter=["a", "b"], a=[3, 5], b=[1, 2]) - .combine("b") - ) + nn = FunAddVar().split(["a", "b"], a=[3, 5], b=[1, 2]).combine("b") + state = get_state(nn) - assert nn.state.splitter == ["NA.a", "NA.b"] - assert nn.state.splitter_rpn == ["NA.a", "NA.b", "*"] - assert nn.state.combiner == ["NA.b"] + assert state.splitter == ["NA.a", "NA.b"] + assert state.splitter_rpn == ["NA.a", "NA.b", "*"] + assert state.combiner == ["NA.b"] - assert nn.state.splitter_final == "NA.a" - assert nn.state.splitter_rpn_final == ["NA.a"] + assert state.splitter_final == "NA.a" + assert state.splitter_rpn_final == ["NA.a"] - nn.state.prepare_states(nn.inputs) - assert nn.state.states_ind == [ + assert state.states_ind == [ {"NA.a": 0, "NA.b": 0}, {"NA.a": 0, "NA.b": 1}, {"NA.a": 1, "NA.b": 0}, {"NA.a": 1, "NA.b": 1}, ] - assert nn.state.states_val == [ + assert state.states_val == [ {"NA.a": 3, "NA.b": 1}, {"NA.a": 3, "NA.b": 2}, {"NA.a": 5, "NA.b": 1}, {"NA.a": 5, "NA.b": 2}, ] - assert nn.state.final_combined_ind_mapping == {0: [0, 1], 1: [2, 3]} + assert state.final_combined_ind_mapping == {0: [0, 1], 1: [2, 3]} def test_task_init_5a(): """updating combiner using overwrite=True""" - nn = ( - fun_addvar(name="NA") - .split(splitter=["a", "b"], a=[3, 5], b=[1, 2]) - .combine("b") - ) - nn.combine("a", overwrite=True) + nn = FunAddVar().split(["a", "b"], a=[3, 5], b=[1, 2]).combine("b") + nn = nn.combine("a", overwrite=True) + state = get_state(nn) - assert nn.state.splitter == ["NA.a", "NA.b"] - assert nn.state.splitter_rpn == ["NA.a", "NA.b", "*"] - assert nn.state.combiner == ["NA.a"] + assert state.splitter == ["NA.a", "NA.b"] + assert state.splitter_rpn == ["NA.a", "NA.b", "*"] + assert state.combiner == ["NA.a"] - assert nn.state.splitter_final == "NA.b" - assert nn.state.splitter_rpn_final == ["NA.b"] + assert state.splitter_final == "NA.b" + assert state.splitter_rpn_final == ["NA.b"] - nn.state.prepare_states(nn.inputs) - assert nn.state.states_ind == [ + assert state.states_ind == [ {"NA.a": 0, "NA.b": 0}, {"NA.a": 0, "NA.b": 1}, {"NA.a": 1, "NA.b": 0}, {"NA.a": 1, "NA.b": 1}, ] - assert nn.state.states_val == [ + assert state.states_val == [ {"NA.a": 3, "NA.b": 1}, {"NA.a": 3, "NA.b": 2}, {"NA.a": 5, "NA.b": 1}, {"NA.a": 5, "NA.b": 2}, ] - assert nn.state.final_combined_ind_mapping == {0: [0, 2], 1: [1, 3]} + assert state.final_combined_ind_mapping == {0: [0, 2], 1: [1, 3]} def test_task_init_5b(): """updating combiner without using overwrite""" - nn = ( - fun_addvar(name="NA") - .split(splitter=["a", "b"], a=[3, 5], b=[1, 2]) - .combine("b") - ) + nn = FunAddVar().split(["a", "b"], a=[3, 5], b=[1, 2]).combine("b") + state = get_state(nn) with pytest.raises(Exception) as excinfo: nn.combine("a") - assert "combiner has been already set" in str(excinfo.value) + assert "Attempting to overwrite existing combiner" in str(excinfo.value) - assert nn.state.combiner == ["NA.b"] + assert state.combiner == ["NA.b"] def test_task_init_5c(): """trying to set the same combiner twice without using overwrite if the combiner is the same, the exception shouldn't be raised """ - nn = ( - fun_addvar(name="NA") - .split(splitter=["a", "b"], a=[3, 5], b=[1, 2]) - .combine("b") - ) - nn.combine("b") + nn = FunAddVar().split(["a", "b"], a=[3, 5], b=[1, 2]).combine("b") + state = get_state(nn) + nn = nn.combine("b", overwrite=True) - assert nn.state.splitter == ["NA.a", "NA.b"] - assert nn.state.splitter_rpn == ["NA.a", "NA.b", "*"] - assert nn.state.combiner == ["NA.b"] + assert state.splitter == ["NA.a", "NA.b"] + assert state.splitter_rpn == ["NA.a", "NA.b", "*"] + assert state.combiner == ["NA.b"] - assert nn.state.splitter_final == "NA.a" - assert nn.state.splitter_rpn_final == ["NA.a"] + assert state.splitter_final == "NA.a" + assert state.splitter_rpn_final == ["NA.a"] def test_task_init_6(): """task with splitter, but the input is an empty list""" - nn = fun_addtwo(name="NA") - nn.split(splitter="a", a=[]) - assert nn.inputs.a == [] + nn = FunAddTwo() + nn = nn.split("a", a=[]) + state = get_state(nn) + assert nn.a == [] - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] - nn.state.prepare_states(nn.inputs) - assert nn.state.states_ind == [] - assert nn.state.states_val == [] + assert state.states_ind == [] + assert state.states_val == [] def test_task_init_7(tmp_path): @@ -309,8 +316,8 @@ def test_task_init_7(tmp_path): with open(file2, "w") as f: f.write("from pydra\n") - nn1 = fun_file_list(name="NA", filename_list=[file1, file2]) - output_dir1 = nn1.output_dir + nn1 = FunFileList(filename_list=[file1, file2]) + hash1 = nn1._hash # changing the content of the file time.sleep(2) # need the mtime to be different @@ -318,43 +325,35 @@ def test_task_init_7(tmp_path): with open(file2, "w") as f: f.write("from pydra") - nn2 = fun_file_list(name="NA", filename_list=[file1, file2]) - output_dir2 = nn2.output_dir + nn2 = FunFileList(filename_list=[file1, file2]) + hash2 = nn2._hash # the checksum should be different - content of file2 is different - assert output_dir1.name != output_dir2.name + assert hash1 != hash2 def test_task_init_8(): - """task without setting the input, the value should be set to attr.NOTHING""" - nn = fun_addtwo(name="NA") - assert nn.inputs.a is attr.NOTHING + """task without setting the input, the value should be set to attrs.NOTHING""" + nn = FunAddTwo() + assert nn.a is attrs.NOTHING def test_task_init_9(): """task without setting the input, but using the default avlue from function""" - nn1 = fun_addvar_default(name="NA", a=2) - assert nn1.inputs.b == 1 + nn1 = FunAddVarDefault(a=2) + assert nn1.b == 1 - nn2 = fun_addvar_default(name="NA", a=2, b=1) - assert nn2.inputs.b == 1 + nn2 = FunAddVarDefault(a=2, b=1) + assert nn2.b == 1 # both tasks should have the same checksum - assert nn1.checksum == nn2.checksum + assert nn1._hash == nn2._hash -def test_task_error(): - func = fun_div(name="div", a=1, b=0) +def test_task_error(tmp_path): + func = FunDiv(a=1, b=0) with pytest.raises(ZeroDivisionError): - func() - assert (func.output_dir / "_error.pklz").exists() - - -def test_odir_init(): - """checking if output_dir is available for a task without init - before running the task - """ - nn = fun_addtwo(name="NA", a=3) - assert nn.output_dir + func(cache_dir=tmp_path) + assert (next(tmp_path.iterdir()) / "_error.pklz").exists() # Tests for tasks without state (i.e. no splitter) @@ -363,124 +362,111 @@ def test_odir_init(): @pytest.mark.flaky(reruns=2) # when dask def test_task_nostate_1(plugin_dask_opt, tmp_path): """task without splitter""" - nn = fun_addtwo(name="NA", a=3) - nn.cache_dir = tmp_path - assert np.allclose(nn.inputs.a, [3]) - assert nn.state is None + nn = FunAddTwo(a=3) - with Submitter(plugin=plugin_dask_opt) as sub: - sub(nn) + assert np.allclose(nn.a, [3]) + state = get_state(nn) + assert state is None + + with Submitter(worker=plugin_dask_opt, cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() - assert results.output.out == 5 - # checking the return_inputs option, either is return_inputs is True, or "val", - # it should give values of inputs that corresponds to the specific element - results_verb = nn.result(return_inputs=True) - results_verb_val = nn.result(return_inputs="val") - assert results_verb[0] == results_verb_val[0] == {"NA.a": 3} - assert results_verb[1].output.out == results_verb_val[1].output.out == 5 - # checking the return_inputs option return_inputs="ind" - # it should give indices of inputs (instead of values) for each element - results_verb_ind = nn.result(return_inputs="ind") - assert results_verb_ind[0] == {"NA.a": None} - assert results_verb_ind[1].output.out == 5 + assert results.outputs.out == 5 # checking the output_dir - assert nn.output_dir.exists() + assert results.output_dir.exists() -def test_task_nostate_1_call(): +def test_task_nostate_1_call(tmp_path): """task without splitter""" - nn = fun_addtwo(name="NA", a=3) - nn() + nn = FunAddTwo(a=3) + with Submitter(cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() - assert results.output.out == 5 + + assert results.outputs.out == 5 # checking the output_dir - assert nn.output_dir.exists() + assert results.output_dir.exists() @pytest.mark.flaky(reruns=2) # when dask def test_task_nostate_1_call_subm(plugin_dask_opt, tmp_path): """task without splitter""" - nn = fun_addtwo(name="NA", a=3) - nn.cache_dir = tmp_path - assert np.allclose(nn.inputs.a, [3]) - assert nn.state is None + nn = FunAddTwo(a=3) - with Submitter(plugin=plugin_dask_opt) as sub: - nn(submitter=sub) + assert np.allclose(nn.a, [3]) + state = get_state(nn) + assert state is None + + with Submitter(worker=plugin_dask_opt, cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() - assert results.output.out == 5 + + assert results.outputs.out == 5 # checking the output_dir - assert nn.output_dir.exists() + assert results.output_dir.exists() @pytest.mark.flaky(reruns=2) # when dask def test_task_nostate_1_call_plug(plugin_dask_opt, tmp_path): """task without splitter""" - nn = fun_addtwo(name="NA", a=3) - nn.cache_dir = tmp_path - assert np.allclose(nn.inputs.a, [3]) - assert nn.state is None - - nn(plugin=plugin_dask_opt) - - # checking the results - results = nn.result() - assert results.output.out == 5 - # checking the output_dir - assert nn.output_dir.exists() + nn = FunAddTwo(a=3) + assert np.allclose(nn.a, [3]) + state = get_state(nn) + assert state is None -def test_task_nostate_1_call_updateinp(): - """task without splitter""" - nn = fun_addtwo(name="NA", a=30) - # updating input when calling the node - nn(a=3) + with Submitter(cache_dir=tmp_path, worker=plugin_dask_opt) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() - assert results.output.out == 5 + + assert results.outputs.out == 5 # checking the output_dir - assert nn.output_dir.exists() + assert results.output_dir.exists() def test_task_nostate_2(plugin, tmp_path): """task with a list as an input, but no splitter""" - nn = moment(name="NA", n=3, lst=[2, 3, 4]) - nn.cache_dir = tmp_path - assert np.allclose(nn.inputs.n, [3]) - assert np.allclose(nn.inputs.lst, [2, 3, 4]) - assert nn.state is None + nn = Moment(n=3, lst=[2, 3, 4]) - with Submitter(plugin=plugin) as sub: - sub(nn) + assert np.allclose(nn.n, [3]) + assert np.allclose(nn.lst, [2, 3, 4]) + state = get_state(nn) + assert state is None + + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() - assert results.output.out == 33 + + assert results.outputs.out == 33 # checking the output_dir - assert nn.output_dir.exists() + assert results.output_dir.exists() def test_task_nostate_3(plugin, tmp_path): """task with a dictionary as an input""" - nn = fun_dict(name="NA", d={"a": "ala", "b": "bala"}) - nn.cache_dir = tmp_path - assert nn.inputs.d == {"a": "ala", "b": "bala"} + nn = FunDict(d={"a": "ala", "b": "bala"}) - with Submitter(plugin=plugin) as sub: - sub(nn) + assert nn.d == {"a": "ala", "b": "bala"} + + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() - assert results.output.out == "a:ala_b:bala" + + assert results.outputs.out == "a:ala_b:bala" # checking the output_dir - assert nn.output_dir.exists() + assert results.output_dir.exists() def test_task_nostate_4(plugin, tmp_path): @@ -489,17 +475,17 @@ def test_task_nostate_4(plugin, tmp_path): with open(file1, "w") as f: f.write("hello from pydra\n") - nn = fun_file(name="NA", filename=file1) - nn.cache_dir = tmp_path + nn = FunFile(filename=file1) - with Submitter(plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() - assert results.output.out == "hello from pydra\n" + + assert results.outputs.out == "hello from pydra\n" # checking the output_dir - assert nn.output_dir.exists() + assert results.output_dir.exists() def test_task_nostate_5(tmp_path): @@ -512,40 +498,38 @@ def test_task_nostate_5(tmp_path): with open(file2, "w") as f: f.write("from pydra\n") - nn = fun_file_list(name="NA", filename_list=[file1, file2]) + nn = FunFileList(filename_list=[file1, file2]) - nn() + outputs = nn() # checking the results - results = nn.result() - assert results.output.out == "hello from pydra\n" - # checking the output_dir - assert nn.output_dir.exists() + + assert outputs.out == "hello from pydra\n" def test_task_nostate_6(): """checking if the function gets the None value""" - nn = fun_addvar_none(name="NA", a=2, b=None) - assert nn.inputs.b is None - nn() - assert nn.result().output.out == 2 + nn = FunAddVarNone(a=2, b=None) + assert nn.b is None + outputs = nn() + assert outputs.out == 2 def test_task_nostate_6a_exception(): - """checking if the function gets the attr.Nothing value""" - nn = fun_addvar_none(name="NA", a=2) - assert nn.inputs.b is attr.NOTHING - with pytest.raises(TypeError) as excinfo: + """checking if the function gets the attrs.Nothing value""" + nn = FunAddVarNone(a=2) + assert nn.b is attrs.NOTHING + with pytest.raises(ValueError) as excinfo: nn() - assert "unsupported" in str(excinfo.value) + assert "Mandatory field 'b' is not set" in str(excinfo.value) def test_task_nostate_7(): """using the default value from the function for b input""" - nn = fun_addvar_default(name="NA", a=2) - assert nn.inputs.b == 1 - nn() - assert nn.result().output.out == 3 + nn = FunAddVarDefault(a=2) + assert nn.b == 1 + outputs = nn() + assert outputs.out == 3 # Testing caching for tasks without states @@ -556,16 +540,18 @@ def test_task_nostate_cachedir(plugin_dask_opt, tmp_path): """task with provided cache_dir using pytest tmp_path""" cache_dir = tmp_path / "test_task_nostate" cache_dir.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) - assert np.allclose(nn.inputs.a, [3]) - assert nn.state is None + nn = FunAddTwo(a=3) + state = get_state(nn) + assert np.allclose(nn.a, [3]) + assert state is None - with Submitter(plugin=plugin_dask_opt) as sub: - sub(nn) + with Submitter(worker=plugin_dask_opt, cache_dir=cache_dir) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() - assert results.output.out == 5 + + assert results.outputs.out == 5 @pytest.mark.flaky(reruns=2) # when dask @@ -575,16 +561,18 @@ def test_task_nostate_cachedir_relativepath(tmp_path, plugin_dask_opt): cache_dir = "test_task_nostate" (tmp_path / cache_dir).mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) - assert np.allclose(nn.inputs.a, [3]) - assert nn.state is None + nn = FunAddTwo(a=3) + assert np.allclose(nn.a, [3]) + state = get_state(nn) + assert state is None - with Submitter(plugin=plugin_dask_opt) as sub: - sub(nn) + with Submitter(worker=plugin_dask_opt, cache_dir=cache_dir) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() - assert results.output.out == 5 + + assert results.outputs.out == 5 shutil.rmtree(cache_dir) @@ -600,21 +588,24 @@ def test_task_nostate_cachelocations(plugin_dask_opt, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) - with Submitter(plugin=plugin_dask_opt) as sub: - sub(nn) + nn = FunAddTwo(a=3) + with Submitter(worker=plugin_dask_opt, cache_dir=cache_dir) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) - nn2 = fun_addtwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) - with Submitter(plugin=plugin_dask_opt) as sub: - sub(nn2) + nn2 = FunAddTwo(a=3) + with Submitter( + worker=plugin_dask_opt, cache_dir=cache_dir2, cache_locations=cache_dir + ) as sub: + results2 = sub(nn2) + assert not results2.errored, "\n".join(results.errors["error message"]) # checking the results - results2 = nn2.result() - assert results2.output.out == 5 + + assert results2.outputs.out == 5 # checking if the second task didn't run the interface again - assert nn.output_dir.exists() - assert not nn2.output_dir.exists() + assert results.output_dir == results2.output_dir def test_task_nostate_cachelocations_forcererun(plugin, tmp_path): @@ -628,21 +619,24 @@ def test_task_nostate_cachelocations_forcererun(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) - with Submitter(plugin=plugin) as sub: - sub(nn) + nn = FunAddTwo(a=3) + with Submitter(worker=plugin, cache_dir=cache_dir) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) - nn2 = fun_addtwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) - with Submitter(plugin=plugin) as sub: - sub(nn2, rerun=True) + nn2 = FunAddTwo(a=3) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir + ) as sub: + results2 = sub(nn2, rerun=True) # checking the results - results2 = nn2.result() - assert results2.output.out == 5 + + assert results2.outputs.out == 5 # checking if the second task rerun the interface - assert nn.output_dir.exists() - assert nn2.output_dir.exists() + assert results.output_dir.exists() + assert results2.output_dir.exists() def test_task_nostate_cachelocations_nosubmitter(tmp_path): @@ -655,19 +649,19 @@ def test_task_nostate_cachelocations_nosubmitter(tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) - nn() + nn = FunAddTwo(a=3) + nn(cache_dir=cache_dir) - nn2 = fun_addtwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) - nn2() + nn2 = FunAddTwo(a=3) + outputs2 = nn2(cache_dir=cache_dir2, cache_locations=cache_dir) # checking the results - results2 = nn2.result() - assert results2.output.out == 5 + + assert outputs2.out == 5 # checking if the second task didn't run the interface again - assert nn.output_dir.exists() - assert not nn2.output_dir.exists() + assert num_python_cache_dirs(cache_dir) == 1 + assert not num_python_cache_dirs(cache_dir2) def test_task_nostate_cachelocations_nosubmitter_forcererun(tmp_path): @@ -681,19 +675,19 @@ def test_task_nostate_cachelocations_nosubmitter_forcererun(tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) - nn() + nn = FunAddTwo(a=3) + nn(cache_dir=cache_dir) - nn2 = fun_addtwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) - nn2(rerun=True) + nn2 = FunAddTwo(a=3) + outputs2 = nn2(rerun=True, cache_dir=cache_dir2, cache_locations=cache_dir) # checking the results - results2 = nn2.result() - assert results2.output.out == 5 + + assert outputs2.out == 5 # checking if the second task run the interface again - assert nn.output_dir.exists() - assert nn2.output_dir.exists() + assert num_python_cache_dirs(cache_dir) == 1 + assert num_python_cache_dirs(cache_dir2) def test_task_nostate_cachelocations_updated(plugin, tmp_path): @@ -710,22 +704,32 @@ def test_task_nostate_cachelocations_updated(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) - with Submitter(plugin=plugin) as sub: - sub(nn) + nn = FunAddTwo(a=3) + with Submitter(worker=plugin, cache_dir=cache_dir) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) + + nn2 = FunAddTwo(a=3) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir + ) as sub: + results1 = sub(nn2) + assert not results1.errored, "\n".join(results.errors["error message"]) - nn2 = fun_addtwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) # updating cache location to non-existing dir - with Submitter(plugin=plugin) as sub: - sub(nn2, cache_locations=cache_dir1) + with Submitter( + worker=plugin, cache_locations=cache_dir1, cache_dir=tmp_path + ) as sub: + results2 = sub(nn2) + assert not results2.errored, "\n".join(results.errors["error message"]) # checking the results - results2 = nn2.result() - assert results2.output.out == 5 + + assert results2.outputs.out == 5 # checking if both tasks run interface - assert nn.output_dir.exists() - assert nn2.output_dir.exists() + assert results.output_dir == results1.output_dir + assert results.output_dir != results2.output_dir # Tests for tasks with states (i.e. with splitter) @@ -739,90 +743,72 @@ def test_task_state_1(plugin_dask_opt, input_type, tmp_path): if input_type == "array": a_in = np.array(a_in) - nn = fun_addtwo(name="NA").split(splitter="a", a=a_in) - nn.cache_dir = tmp_path + nn = FunAddTwo().split("a", a=a_in) + state = get_state(nn) - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] - assert (nn.inputs.a == np.array([3, 5])).all() + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] + assert (nn.a == np.array([3, 5])).all() - with Submitter(plugin=plugin_dask_opt) as sub: - sub(nn) + with Submitter(worker=plugin_dask_opt, cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() - expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] - for i, res in enumerate(expected): - assert results[i].output.out == res[1] - # checking the return_inputs option, either return_inputs is True or "val", - # it should give values of inputs that corresponds to the specific element - results_verb = nn.result(return_inputs=True) - results_verb_val = nn.result(return_inputs="val") + expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] for i, res in enumerate(expected): - assert (results_verb[i][0], results_verb[i][1].output.out) == res - assert (results_verb_val[i][0], results_verb_val[i][1].output.out) == res - - # checking the return_inputs option return_inputs="ind" - # it should give indices of inputs (instead of values) for each element - results_verb_ind = nn.result(return_inputs="ind") - expected_ind = [({"NA.a": 0}, 5), ({"NA.a": 1}, 7)] - for i, res in enumerate(expected_ind): - assert (results_verb_ind[i][0], results_verb_ind[i][1].output.out) == res - - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out[i] == res[1] def test_task_state_1a(plugin, tmp_path): """task with the simplest splitter (inputs set separately)""" - nn = fun_addtwo(name="NA") - nn.split(splitter="a", a=[1, 2]) - nn.inputs.a = StateArray([3, 5]) - nn.cache_dir = tmp_path + nn = FunAddTwo() + nn = nn.split("a", a=[1, 2]) + nn.a = StateArray([3, 5]) - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] - assert (nn.inputs.a == np.array([3, 5])).all() + state = get_state(nn) - with Submitter(plugin=plugin) as sub: - sub(nn) + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] + assert (nn.a == np.array([3, 5])).all() + + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() + expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] for i, res in enumerate(expected): - assert results[i].output.out == res[1] + assert results.outputs.out[i] == res[1] def test_task_state_singl_1(plugin, tmp_path): """Tasks with two inputs and a splitter (no combiner) one input is a single value, the other is in the splitter and combiner """ - nn = fun_addvar(name="NA").split(splitter="a", a=[3, 5], b=10) - nn.cache_dir = tmp_path + nn = FunAddVar(b=10).split("a", a=[3, 5]) + state = get_state(nn) - assert nn.inputs.a == [3, 5] - assert nn.inputs.b == 10 - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] - assert nn.state.splitter_final == "NA.a" - assert nn.state.splitter_rpn_final == ["NA.a"] + assert nn.a == [3, 5] + assert nn.b == 10 + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] + assert state.splitter_final == "NA.a" + assert state.splitter_rpn_final == ["NA.a"] - with Submitter(plugin=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results expected = [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 5, "NA.b": 10}, 15)] - results = nn.result() + for i, res in enumerate(expected): - assert results[i].output.out == res[1] + assert results.outputs.out[i] == res[1] # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.output_dir.exists() @pytest.mark.parametrize( @@ -871,63 +857,44 @@ def test_task_state_2( a_in, b_in = np.array(a_in), np.array(b_in) elif input_type == "mixed": a_in = np.array(a_in) - nn = fun_addvar(name="NA").split(splitter=splitter, a=a_in, b=b_in) - nn.cache_dir = tmp_path + nn = FunAddVar().split(splitter, a=a_in, b=b_in) + state = get_state(nn) - assert (nn.inputs.a == np.array([3, 5])).all() - assert (nn.inputs.b == np.array([10, 20])).all() - assert nn.state.splitter == state_splitter - assert nn.state.splitter_rpn == state_rpn - assert nn.state.splitter_final == state_splitter - assert nn.state.splitter_rpn_final == state_rpn + assert (nn.a == np.array([3, 5])).all() + assert (nn.b == np.array([10, 20])).all() + assert state.splitter == state_splitter + assert state.splitter_rpn == state_rpn + assert state.splitter_final == state_splitter + assert state.splitter_rpn_final == state_rpn - with Submitter(plugin=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() - for i, res in enumerate(expected): - assert results[i].output.out == res[1] - # checking the return_inputs option, either return_inputs is True or "val", - # it should give values of inputs that corresponds to the specific element - results_verb = nn.result(return_inputs=True) - results_verb_val = nn.result(return_inputs="val") for i, res in enumerate(expected): - assert (results_verb[i][0], results_verb[i][1].output.out) == res - assert (results_verb_val[i][0], results_verb_val[i][1].output.out) == res - - # checking the return_inputs option return_inputs="ind" - # it should give indices of inputs (instead of values) for each element - results_verb_ind = nn.result(return_inputs="ind") - for i, res in enumerate(expected_ind): - assert (results_verb_ind[i][0], results_verb_ind[i][1].output.out) == res - - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out[i] == res[1] def test_task_state_3(plugin, tmp_path): """task with the simplest splitter, the input is an empty list""" - nn = fun_addtwo(name="NA").split(splitter="a", a=[]) - nn.cache_dir = tmp_path + nn = FunAddTwo().split("a", a=[]) + state = get_state(nn) - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] - assert nn.inputs.a == [] + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] + assert nn.a == [] - with Submitter(plugin=plugin) as sub: - sub(nn) + with Submitter(worker="debug", cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() + expected = [] for i, res in enumerate(expected): - assert results[i].output.out == res[1] - # checking the output_dir - assert nn.output_dir == [] + assert results.outputs.out[i] == res[1] @pytest.mark.parametrize("input_type", ["list", "array"]) @@ -936,196 +903,147 @@ def test_task_state_4(plugin, input_type, tmp_path): lst_in = [[2, 3, 4], [1, 2, 3]] if input_type == "array": lst_in = np.array(lst_in, dtype=int) - nn = moment(name="NA", n=3).split(splitter="lst", lst=lst_in) - nn.cache_dir = tmp_path + nn = Moment(n=3).split("lst", lst=lst_in) + state = get_state(nn) - assert np.allclose(nn.inputs.n, 3) - assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) - assert nn.state.splitter == "NA.lst" + assert np.allclose(nn.n, 3) + assert np.allclose(nn.lst, [[2, 3, 4], [1, 2, 3]]) + assert state.splitter == "NA.lst" - with Submitter(plugin=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking that split is done across dim 0 - el_0 = nn.state.states_val[0]["NA.lst"] + el_0 = state.states_val[0]["NA.lst"] if input_type == "list": assert el_0 == [2, 3, 4] elif input_type == "array": assert el_0 == [2, 3, 4] # checking the results - results = nn.result() + for i, expected in enumerate([33, 12]): - assert results[i].output.out == expected - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out[i] == expected def test_task_state_4a(plugin, tmp_path): """task with a tuple as an input, and a simple splitter""" - nn = moment(name="NA", n=3).split(splitter="lst", lst=[(2, 3, 4), (1, 2, 3)]) - nn.cache_dir = tmp_path + nn = Moment(n=3).split("lst", lst=[(2, 3, 4), (1, 2, 3)]) + state = get_state(nn) - assert np.allclose(nn.inputs.n, 3) - assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) - assert nn.state.splitter == "NA.lst" + assert np.allclose(nn.n, 3) + assert np.allclose(nn.lst, [[2, 3, 4], [1, 2, 3]]) + assert state.splitter == "NA.lst" - with Submitter(plugin=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() + for i, expected in enumerate([33, 12]): - assert results[i].output.out == expected - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out[i] == expected def test_task_state_5(plugin, tmp_path): """task with a list as an input, and the variable is part of the scalar splitter""" - nn = moment(name="NA").split( - splitter=("n", "lst"), n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]] - ) - nn.cache_dir = tmp_path + nn = Moment().split(("n", "lst"), n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]]) + state = get_state(nn) - assert np.allclose(nn.inputs.n, [1, 3]) - assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) - assert nn.state.splitter == ("NA.n", "NA.lst") + assert np.allclose(nn.n, [1, 3]) + assert np.allclose(nn.lst, [[2, 3, 4], [1, 2, 3]]) + assert state.splitter == ("NA.n", "NA.lst") - with Submitter(plugin=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() + for i, expected in enumerate([3, 12]): - assert results[i].output.out == expected - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out[i] == expected def test_task_state_5_exception(plugin, tmp_path): """task with a list as an input, and the variable is part of the scalar splitter the shapes are not matching, so exception should be raised """ - nn = moment(name="NA").split( - splitter=("n", "lst"), n=[1, 3, 3], lst=[[2, 3, 4], [1, 2, 3]] - ) - nn.cache_dir = tmp_path + nn = Moment().split(("n", "lst"), n=[1, 3, 3], lst=[[2, 3, 4], [1, 2, 3]]) - assert np.allclose(nn.inputs.n, [1, 3, 3]) - assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) - assert nn.state.splitter == ("NA.n", "NA.lst") + assert np.allclose(nn.n, [1, 3, 3]) + assert np.allclose(nn.lst, [[2, 3, 4], [1, 2, 3]]) with pytest.raises(Exception) as excinfo: - with Submitter(plugin=plugin) as sub: - sub(nn) + get_state(nn) + assert "shape" in str(excinfo.value) def test_task_state_6(plugin, tmp_path): """ask with a list as an input, and the variable is part of the outer splitter""" - nn = moment(name="NA").split( - splitter=["n", "lst"], n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]] - ) - nn.cache_dir = tmp_path + nn = Moment().split(["n", "lst"], n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]]) + state = get_state(nn) - assert np.allclose(nn.inputs.n, [1, 3]) - assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) - assert nn.state.splitter == ["NA.n", "NA.lst"] + assert np.allclose(nn.n, [1, 3]) + assert np.allclose(nn.lst, [[2, 3, 4], [1, 2, 3]]) + assert state.splitter == ["NA.n", "NA.lst"] - with Submitter(plugin=plugin) as sub: - sub(nn) + with Submitter(worker="debug", cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() - for i, expected in enumerate([3, 2, 33, 12]): - assert results[i].output.out == expected - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out == [3.0, 2.0, 33.0, 12.0] def test_task_state_6a(plugin, tmp_path): """ask with a tuple as an input, and the variable is part of the outer splitter""" - nn = moment(name="NA").split( - splitter=["n", "lst"], n=[1, 3], lst=[(2, 3, 4), (1, 2, 3)] - ) - nn.cache_dir = tmp_path + nn = Moment().split(["n", "lst"], n=[1, 3], lst=[(2, 3, 4), (1, 2, 3)]) + state = get_state(nn) - assert np.allclose(nn.inputs.n, [1, 3]) - assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) - assert nn.state.splitter == ["NA.n", "NA.lst"] + assert np.allclose(nn.n, [1, 3]) + assert np.allclose(nn.lst, [[2, 3, 4], [1, 2, 3]]) + assert state.splitter == ["NA.n", "NA.lst"] - with Submitter(plugin=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() - for i, expected in enumerate([3, 2, 33, 12]): - assert results[i].output.out == expected - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out == [3.0, 2.0, 33.0, 12.0] @pytest.mark.flaky(reruns=2) # when dask def test_task_state_comb_1(plugin_dask_opt, tmp_path): """task with the simplest splitter and combiner""" - nn = fun_addtwo(name="NA").split(a=[3, 5], splitter="a").combine(combiner="a") - nn.cache_dir = tmp_path + nn = FunAddTwo().split(a=[3, 5]).combine(combiner="a") + state = get_state(nn) - assert (nn.inputs.a == np.array([3, 5])).all() + assert (nn.a == np.array([3, 5])).all() - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] - assert nn.state.combiner == ["NA.a"] - assert nn.state.splitter_final is None - assert nn.state.splitter_rpn_final == [] + assert state.splitter == ["NA.a"] + assert state.splitter_rpn == ["NA.a"] + assert state.combiner == ["NA.a"] + assert state.splitter_final is None + assert state.splitter_rpn_final == [] - with Submitter(plugin=plugin_dask_opt) as sub: - sub(nn) + with Submitter(worker="debug", cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) - assert nn.state.states_ind == [{"NA.a": 0}, {"NA.a": 1}] - assert nn.state.states_val == [{"NA.a": 3}, {"NA.a": 5}] + assert state.states_ind == [{"NA.a": 0}, {"NA.a": 1}] + assert state.states_val == [{"NA.a": 3}, {"NA.a": 5}] # checking the results - results = nn.result() - # fully combined (no nested list) - combined_results = [res.output.out for res in results] - assert combined_results == [5, 7] - expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] - expected_ind = [({"NA.a": 0}, 5), ({"NA.a": 1}, 7)] - # checking the return_inputs option, either return_inputs is True or "val", - # it should give values of inputs that corresponds to the specific element - results_verb = nn.result(return_inputs=True) - results_verb_val = nn.result(return_inputs="val") - for i, res in enumerate(expected): - assert (results_verb[i][0], results_verb[i][1].output.out) == res - assert (results_verb_val[i][0], results_verb_val[i][1].output.out) == res - # checking the return_inputs option return_inputs="ind" - # it should give indices of inputs (instead of values) for each element - results_verb_ind = nn.result(return_inputs="ind") - for i, res in enumerate(expected_ind): - assert (results_verb_ind[i][0], results_verb_ind[i][1].output.out) == res - - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + # fully combined (no nested list) + assert results.outputs.out == [5, 7] @pytest.mark.parametrize( "splitter, combiner, state_splitter, state_rpn, state_combiner, state_combiner_all, " - "state_splitter_final, state_rpn_final, expected, expected_val", + "state_splitter_final, state_rpn_final, expected", # , expected_val", [ ( ("a", "b"), @@ -1137,7 +1055,7 @@ def test_task_state_comb_1(plugin_dask_opt, tmp_path): None, [], [13, 25], - [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 5, "NA.b": 20}, 25)], + # [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 5, "NA.b": 20}, 25)], ), ( ("a", "b"), @@ -1149,7 +1067,7 @@ def test_task_state_comb_1(plugin_dask_opt, tmp_path): None, [], [13, 25], - [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 5, "NA.b": 20}, 25)], + # [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 5, "NA.b": 20}, 25)], ), ( ["a", "b"], @@ -1161,10 +1079,10 @@ def test_task_state_comb_1(plugin_dask_opt, tmp_path): "NA.b", ["NA.b"], [[13, 15], [23, 25]], - [ - [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 5, "NA.b": 10}, 15)], - [({"NA.a": 3, "NA.b": 20}, 23), ({"NA.a": 5, "NA.b": 20}, 25)], - ], + # [ + # [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 5, "NA.b": 10}, 15)], + # [({"NA.a": 3, "NA.b": 20}, 23), ({"NA.a": 5, "NA.b": 20}, 25)], + # ], ), ( ["a", "b"], @@ -1176,10 +1094,10 @@ def test_task_state_comb_1(plugin_dask_opt, tmp_path): "NA.a", ["NA.a"], [[13, 23], [15, 25]], - [ - [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 3, "NA.b": 20}, 23)], - [({"NA.a": 5, "NA.b": 10}, 15), ({"NA.a": 5, "NA.b": 20}, 25)], - ], + # [ + # [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 3, "NA.b": 20}, 23)], + # [({"NA.a": 5, "NA.b": 10}, 15), ({"NA.a": 5, "NA.b": 20}, 25)], + # ], ), ( ["a", "b"], @@ -1191,12 +1109,12 @@ def test_task_state_comb_1(plugin_dask_opt, tmp_path): None, [], [13, 23, 15, 25], - [ - ({"NA.a": 3, "NA.b": 10}, 13), - ({"NA.a": 3, "NA.b": 20}, 23), - ({"NA.a": 5, "NA.b": 10}, 15), - ({"NA.a": 5, "NA.b": 20}, 25), - ], + # [ + # ({"NA.a": 3, "NA.b": 10}, 13), + # ({"NA.a": 3, "NA.b": 20}, 23), + # ({"NA.a": 5, "NA.b": 10}, 15), + # ({"NA.a": 5, "NA.b": 20}, 25), + # ], ), ], ) @@ -1211,160 +1129,121 @@ def test_task_state_comb_2( state_splitter_final, state_rpn_final, expected, - expected_val, + # expected_val, tmp_path, ): """Tasks with scalar and outer splitters and partial or full combiners""" - nn = ( - fun_addvar(name="NA") - .split(a=[3, 5], b=[10, 20], splitter=splitter) - .combine(combiner=combiner) - ) - nn.cache_dir = tmp_path + nn = FunAddVar().split(splitter, a=[3, 5], b=[10, 20]).combine(combiner=combiner) + state = get_state(nn) - assert (nn.inputs.a == np.array([3, 5])).all() + assert (nn.a == np.array([3, 5])).all() - assert nn.state.splitter == state_splitter - assert nn.state.splitter_rpn == state_rpn - assert nn.state.combiner == state_combiner + assert state.splitter == state_splitter + assert state.splitter_rpn == state_rpn + assert state.combiner == state_combiner - with Submitter(plugin=plugin) as sub: - sub(nn) + with Submitter(worker="debug", cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) - assert nn.state.splitter_final == state_splitter_final - assert nn.state.splitter_rpn_final == state_rpn_final - assert set(nn.state.current_combiner_all) == set(state_combiner_all) + assert state.splitter_final == state_splitter_final + assert state.splitter_rpn_final == state_rpn_final + assert set(state.current_combiner_all) == set(state_combiner_all) # checking the results - results = nn.result() + # checking the return_inputs option, either return_inputs is True or "val", # it should give values of inputs that corresponds to the specific element - results_verb = nn.result(return_inputs=True) - - if nn.state.splitter_rpn_final: - for i, res in enumerate(expected): - assert [res.output.out for res in results[i]] == res - # results_verb - for i, res_l in enumerate(expected_val): - for j, res in enumerate(res_l): - assert (results_verb[i][j][0], results_verb[i][j][1].output.out) == res - # if the combiner is full expected is "a flat list" - else: - assert [res.output.out for res in results] == expected - for i, res in enumerate(expected_val): - assert (results_verb[i][0], results_verb[i][1].output.out) == res + # results_verb = nn.result(return_inputs=True) - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out == expected def test_task_state_comb_singl_1(plugin, tmp_path): """Tasks with two inputs; one input is a single value, the other is in the splitter and combiner """ - nn = fun_addvar(name="NA").split(splitter="a", a=[3, 5], b=10).combine(combiner="a") - nn.cache_dir = tmp_path - - assert nn.inputs.a == [3, 5] - assert nn.inputs.b == 10 - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] - assert nn.state.combiner == ["NA.a"] - assert nn.state.splitter_final is None - assert nn.state.splitter_rpn_final == [] - - with Submitter(plugin=plugin) as sub: - sub(nn) + nn = FunAddVar(b=10).split("a", a=[3, 5]).combine(combiner="a") + state = get_state(nn) - # checking the results - expected = ({}, [13, 15]) - results = nn.result() - # full combiner, no nested list - combined_results = [res.output.out for res in results] - assert combined_results == expected[1] - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert nn.a == [3, 5] + assert nn.b == 10 + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] + assert state.combiner == ["NA.a"] + assert state.splitter_final is None + assert state.splitter_rpn_final == [] + + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) + + assert results.outputs.out == [13, 15] def test_task_state_comb_3(plugin, tmp_path): """task with the simplest splitter, the input is an empty list""" - nn = fun_addtwo(name="NA").split(splitter="a", a=[]).combine(combiner=["a"]) - nn.cache_dir = tmp_path + nn = FunAddTwo().split("a", a=[]).combine(combiner=["a"]) + state = get_state(nn) - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] - assert nn.inputs.a == [] + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] + assert nn.a == [] - with Submitter(plugin=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() + expected = [] for i, res in enumerate(expected): - assert results[i].output.out == res[1] - # checking the output_dir - assert nn.output_dir == [] + assert results.outputs.out[i] == res[1] -def test_task_state_comb_order(): +def test_task_state_comb_order(tmp_path): """tasks with an outer splitter and various combiner; showing the order of results """ # single combiner "a" - will create two lists, first one for b=3, second for b=5 - nn_a = ( - fun_addvar(name="NA") - .split(splitter=["a", "b"], a=[10, 20], b=[3, 5]) - .combine(combiner="a") - ) - assert nn_a.state.combiner == ["NA.a"] + nn_a = FunAddVar().split(["a", "b"], a=[10, 20], b=[3, 5]).combine(combiner="a") + state_a = get_state(nn_a) + assert state_a.combiner == ["NA.a"] - results_a = nn_a() - combined_results_a = [[res.output.out for res in res_l] for res_l in results_a] - assert combined_results_a == [[13, 23], [15, 25]] + outputs = nn_a(cache_dir=tmp_path / "cache") + # combined_results_a = [[res.output.out for res in res_l] for res_l in results_a] + assert outputs.out == [[13, 23], [15, 25]] # single combiner "b" - will create two lists, first one for a=10, second for a=20 - nn_b = ( - fun_addvar(name="NA") - .split(splitter=["a", "b"], a=[10, 20], b=[3, 5]) - .combine(combiner="b") - ) - assert nn_b.state.combiner == ["NA.b"] + nn_b = FunAddVar().split(["a", "b"], a=[10, 20], b=[3, 5]).combine(combiner="b") + state_b = get_state(nn_b) + assert state_b.combiner == ["NA.b"] - results_b = nn_b() - combined_results_b = [[res.output.out for res in res_l] for res_l in results_b] - assert combined_results_b == [[13, 15], [23, 25]] + outputs_b = nn_b(cache_dir=tmp_path / "cache_b") + # combined_results_b = [[res.output.out for res in res_l] for res_l in results_b] + assert outputs_b.out == [[13, 15], [23, 25]] # combiner with both fields ["a", "b"] - will create one list nn_ab = ( - fun_addvar(name="NA") - .split(splitter=["a", "b"], a=[10, 20], b=[3, 5]) - .combine(combiner=["a", "b"]) + FunAddVar().split(["a", "b"], a=[10, 20], b=[3, 5]).combine(combiner=["a", "b"]) ) - assert nn_ab.state.combiner == ["NA.a", "NA.b"] + state_ab = get_state(nn_ab) + assert state_ab.combiner == ["NA.a", "NA.b"] - results_ab = nn_ab() - # full combiner, no nested list - combined_results_ab = [res.output.out for res in results_ab] - assert combined_results_ab == [13, 15, 23, 25] + outputs_ab = nn_ab(cache_dir=tmp_path / "cache_ab") + assert outputs_ab.out == [13, 15, 23, 25] # combiner with both fields ["b", "a"] - will create the same list as nn_ab # no difference in the order for setting combiner nn_ba = ( - fun_addvar(name="NA") - .split(splitter=["a", "b"], a=[10, 20], b=[3, 5]) - .combine(combiner=["b", "a"]) + FunAddVar().split(["a", "b"], a=[10, 20], b=[3, 5]).combine(combiner=["b", "a"]) ) - assert nn_ba.state.combiner == ["NA.b", "NA.a"] + state_ba = get_state(nn_ba) + assert state_ba.combiner == ["NA.b", "NA.a"] - results_ba = nn_ba() - combined_results_ba = [res.output.out for res in results_ba] - assert combined_results_ba == [13, 15, 23, 25] + outputs_ba = nn_ba(cache_dir=tmp_path / "cache_ba") + assert outputs_ba.out == [13, 15, 23, 25] # Testing with container dimensions for the input @@ -1372,31 +1251,23 @@ def test_task_state_comb_order(): def test_task_state_contdim_1(tmp_path): """task with a spliter and container dimension for one of the value""" - task_4var = op_4var( - name="op_4var", + task_4var = Op4Var( a="a1", - cache_dir=tmp_path, - ) - task_4var.split( + ).split( ("b", ["c", "d"]), b=[["b1", "b2"], ["b3", "b4"]], c=["c1", "c2"], d=["d1", "d2"], cont_dim={"b": 2}, ) - task_4var() - res = task_4var.result() - assert len(res) == 4 - assert res[3].output.out == "a1 b4 c2 d2" + outputs = task_4var(cache_dir=tmp_path) + assert len(outputs.out) == 4 + assert outputs.out[3] == "a1 b4 c2 d2" def test_task_state_contdim_2(tmp_path): """task with a splitter and container dimension for one of the value""" - task_4var = op_4var( - name="op_4var", - cache_dir=tmp_path, - ) - task_4var.split( + task_4var = Op4Var().split( ["a", ("b", ["c", "d"])], cont_dim={"b": 2}, a=["a1", "a2"], @@ -1404,50 +1275,46 @@ def test_task_state_contdim_2(tmp_path): c=["c1", "c2"], d=["d1", "d2"], ) - task_4var() - res = task_4var.result() - assert len(res) == 8 - assert res[7].output.out == "a2 b4 c2 d2" + outputs = task_4var(cache_dir=tmp_path) + assert len(outputs.out) == 8 + assert outputs.out[7] == "a2 b4 c2 d2" def test_task_state_comb_contdim_1(tmp_path): """task with a splitter-combiner, and container dimension for one of the value""" - task_4var = op_4var( - name="op_4var", - a="a1", - cache_dir=tmp_path, + task_4var = ( + Op4Var(a="a1") + .split( + ("b", ["c", "d"]), + cont_dim={"b": 2}, + b=[["b1", "b2"], ["b3", "b4"]], + c=["c1", "c2"], + d=["d1", "d2"], + ) + .combine("b") ) - task_4var.split( - ("b", ["c", "d"]), - cont_dim={"b": 2}, - b=[["b1", "b2"], ["b3", "b4"]], - c=["c1", "c2"], - d=["d1", "d2"], - ).combine("b") - task_4var() - res = task_4var.result() - assert len(res) == 4 - assert res[3].output.out == "a1 b4 c2 d2" + outputs = task_4var(cache_dir=tmp_path) + assert len(outputs.out) == 4 + assert outputs.out[3] == "a1 b4 c2 d2" def test_task_state_comb_contdim_2(tmp_path): """task with a splitter-combiner, and container dimension for one of the value""" - task_4var = op_4var( - name="op_4var", - cache_dir=tmp_path, + task_4var = ( + Op4Var() + .split( + ["a", ("b", ["c", "d"])], + a=["a1", "a2"], + b=[["b1", "b2"], ["b3", "b4"]], + c=["c1", "c2"], + d=["d1", "d2"], + cont_dim={"b": 2}, + ) + .combine("a") ) - task_4var.split( - ["a", ("b", ["c", "d"])], - a=["a1", "a2"], - b=[["b1", "b2"], ["b3", "b4"]], - c=["c1", "c2"], - d=["d1", "d2"], - cont_dim={"b": 2}, - ).combine("a") - task_4var() - res = task_4var.result() - assert len(res) == 4 - assert res[3][1].output.out == "a2 b4 c2 d2" + outputs = task_4var(cache_dir=tmp_path) + assert len(outputs.out) == 4 + assert outputs.out[3][1] == "a2 b4 c2 d2" # Testing caching for tasks with states @@ -1458,19 +1325,21 @@ def test_task_state_cachedir(plugin_dask_opt, tmp_path): """task with a state and provided cache_dir using pytest tmp_path""" cache_dir = tmp_path / "test_task_nostate" cache_dir.mkdir() - nn = fun_addtwo(name="NA", cache_dir=cache_dir).split(splitter="a", a=[3, 5]) + nn = FunAddTwo().split("a", a=[3, 5]) + state = get_state(nn) - assert nn.state.splitter == "NA.a" - assert (nn.inputs.a == np.array([3, 5])).all() + assert state.splitter == "NA.a" + assert (nn.a == np.array([3, 5])).all() - with Submitter(plugin=plugin_dask_opt) as sub: - sub(nn) + with Submitter(worker=plugin_dask_opt, cache_dir=cache_dir) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - results = nn.result() + expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] for i, res in enumerate(expected): - assert results[i].output.out == res[1] + assert results.outputs.out[i] == res[1] def test_task_state_cachelocations(plugin, tmp_path): @@ -1483,24 +1352,25 @@ def test_task_state_cachelocations(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir).split(splitter="a", a=[3, 5]) - with Submitter(plugin=plugin) as sub: + nn = FunAddTwo(a=3).split("a", a=[3, 5]) + with Submitter(worker=plugin, cache_dir=cache_dir) as sub: sub(nn) - nn2 = fun_addtwo( - name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir - ).split(splitter="a", a=[3, 5]) - with Submitter(plugin=plugin) as sub: - sub(nn2) + nn2 = FunAddTwo(a=3).split("a", a=[3, 5]) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir + ) as sub: + results2 = sub(nn2) + assert not results2.errored, "\n".join(results.errors["error message"]) # checking the results - results2 = nn2.result() expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] for i, res in enumerate(expected): - assert results2[i].output.out == res[1] + assert results2.outputs.out[i] == res[1] - assert all([dir.exists() for dir in nn.output_dir]) - assert not any([dir.exists() for dir in nn2.output_dir]) + # Would ideally check for all nodes of the workflows + assert num_python_cache_dirs(cache_dir) == 2 + assert not num_python_cache_dirs(cache_dir2) def test_task_state_cachelocations_forcererun(plugin, tmp_path): @@ -1514,25 +1384,25 @@ def test_task_state_cachelocations_forcererun(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir).split(splitter="a", a=[3, 5]) - with Submitter(plugin=plugin) as sub: + nn = FunAddTwo(a=3).split("a", a=[3, 5]) + with Submitter(worker=plugin, cache_dir=cache_dir) as sub: sub(nn) - nn2 = fun_addtwo( - name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir - ).split(splitter="a", a=[3, 5]) - with Submitter(plugin=plugin) as sub: - sub(nn2, rerun=True) + nn2 = FunAddTwo(a=3).split("a", a=[3, 5]) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir + ) as sub: + results2 = sub(nn2, rerun=True) # checking the results - results2 = nn2.result() + expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] for i, res in enumerate(expected): - assert results2[i].output.out == res[1] + assert results2.outputs.out[i] == res[1] # both workflows should be run - assert all([dir.exists() for dir in nn.output_dir]) - assert all([dir.exists() for dir in nn2.output_dir]) + assert num_python_cache_dirs(cache_dir) == 2 + assert num_python_cache_dirs(cache_dir2) == 2 def test_task_state_cachelocations_updated(plugin, tmp_path): @@ -1549,25 +1419,26 @@ def test_task_state_cachelocations_updated(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", cache_dir=cache_dir).split(splitter="a", a=[3, 5]) - with Submitter(plugin=plugin) as sub: + nn = FunAddTwo().split("a", a=[3, 5]) + with Submitter(worker=plugin, cache_dir=cache_dir) as sub: sub(nn) - nn2 = fun_addtwo(name="NA", cache_dir=cache_dir2, cache_locations=cache_dir).split( - splitter="a", a=[3, 5] - ) - with Submitter(plugin=plugin) as sub: - sub(nn2, cache_locations=cache_dir1) + nn2 = FunAddTwo().split("a", a=[3, 5]) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(nn2) + assert not results2.errored, "\n".join(results.errors["error message"]) # checking the results - results2 = nn2.result() + expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] for i, res in enumerate(expected): - assert results2[i].output.out == res[1] + assert results2.outputs.out[i] == res[1] # both workflows should be run - assert all([dir.exists() for dir in nn.output_dir]) - assert all([dir.exists() for dir in nn2.output_dir]) + assert num_python_cache_dirs(cache_dir) == 2 + assert num_python_cache_dirs(cache_dir2) == 2 def test_task_files_cachelocations(plugin_dask_opt, tmp_path): @@ -1587,23 +1458,24 @@ def test_task_files_cachelocations(plugin_dask_opt, tmp_path): input2 = input_dir / "input2.txt" input2.write_text("test") - nn = fun_file(name="NA", filename=input1, cache_dir=cache_dir) - with Submitter(plugin=plugin_dask_opt) as sub: - sub(nn) + nn = FunFile(filename=input1) + with Submitter(worker=plugin_dask_opt, cache_dir=cache_dir) as sub: + results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) - nn2 = fun_file( - name="NA", filename=input2, cache_dir=cache_dir2, cache_locations=cache_dir - ) - with Submitter(plugin=plugin_dask_opt) as sub: - sub(nn2) + nn2 = FunFile(filename=input2) + with Submitter( + worker=plugin_dask_opt, cache_dir=cache_dir2, cache_locations=cache_dir + ) as sub: + results2 = sub(nn2) + assert not results2.errored, "\n".join(results.errors["error message"]) # checking the results - results2 = nn2.result() - assert results2.output.out == "test" + + assert results2.outputs.out == "test" # checking if the second task didn't run the interface again - assert nn.output_dir.exists() - assert not nn2.output_dir.exists() + assert results.output_dir == results2.output_dir class OverriddenContentsFile(File): @@ -1626,10 +1498,10 @@ def byte_chunks(self, **kwargs) -> ty.Generator[ty.Tuple[str, bytes], None, None yield from super().byte_chunks(**kwargs) @property - def contents(self): + def raw_contents(self): if self._contents is not None: return self._contents - return super().contents + return super().raw_contents def test_task_files_persistentcache(tmp_path): @@ -1643,23 +1515,16 @@ def test_task_files_persistentcache(tmp_path): cache_dir.mkdir() test_file = OverriddenContentsFile(test_file_path) - @pydra.mark.task + @python.define def read_contents(x: OverriddenContentsFile) -> bytes: - return x.contents + return x.raw_contents - assert ( - read_contents(x=test_file, cache_dir=cache_dir)(plugin="serial").output.out - == b"foo" - ) + assert read_contents(x=test_file)(cache_dir=cache_dir).out == b"foo" test_file._contents = b"bar" # should return result from the first run using the persistent cache - assert ( - read_contents(x=test_file, cache_dir=cache_dir)(plugin="serial").output.out - == b"foo" - ) + assert read_contents(x=test_file)(cache_dir=cache_dir).out == b"foo" time.sleep(2) # Windows has a 2-second resolution for mtime test_file_path.touch() # update the mtime to invalidate the persistent cache value assert ( - read_contents(x=test_file, cache_dir=cache_dir)(plugin="serial").output.out - == b"bar" + read_contents(x=test_file)(cache_dir=cache_dir).out == b"bar" ) # returns the overridden value diff --git a/pydra/engine/tests/test_numpy_examples.py b/pydra/engine/tests/test_numpy_examples.py index defdad7a2b..e9acc379cd 100644 --- a/pydra/engine/tests/test_numpy_examples.py +++ b/pydra/engine/tests/test_numpy_examples.py @@ -7,55 +7,50 @@ from ..submitter import Submitter -from ..core import Workflow -from ...mark import task, annotate -from .utils import identity -from ...utils.hash import hash_function, Cache +from pydra.design import python, workflow +from .utils import Identity +from pydra.utils.hash import hash_function if importlib.util.find_spec("numpy") is None: pytest.skip("can't find numpy library", allow_module_level=True) -@task -@annotate({"return": {"b": ty.Any}}) -def arrayout(val): +@python.define(outputs=["b"]) +def ArrayOut(val): return np.array([val, val]) def test_multiout(tmpdir): """testing a simple function that returns a numpy array""" - wf = Workflow("wf", input_spec=["val"], val=2) - wf.add(arrayout(name="mo", val=wf.lzin.val)) - wf.set_output([("array", wf.mo.lzout.b)]) - wf.cache_dir = tmpdir + @workflow.define(outputs=["array"]) + def Workflow(val): + mo = workflow.add(ArrayOut(val=val)) + return mo.b - with Submitter(plugin="cf", n_procs=2) as sub: - sub(runnable=wf) + wf = Workflow(val=2) - results = wf.result(return_inputs=True) + with Submitter(worker="cf", cache_dir=tmpdir, n_procs=2) as sub: + results = sub(wf) - assert results[0] == {"wf.val": 2} - assert np.array_equal(results[1].output.array, np.array([2, 2])) + assert np.array_equal(results.outputs.array, np.array([2, 2])) def test_multiout_st(tmpdir): """testing a simple function that returns a numpy array, adding splitter""" - wf = Workflow("wf", input_spec=["val"], val=[0, 1, 2]) - wf.add(arrayout(name="mo")) - wf.mo.split("val", val=wf.lzin.val).combine("val") - wf.set_output([("array", wf.mo.lzout.b)]) - wf.cache_dir = tmpdir + @workflow.define(outputs=["array"]) + def Workflow(values): + mo = workflow.add(ArrayOut().split(val=values).combine("val")) + return mo.b - with Submitter(plugin="cf", n_procs=2) as sub: - sub(runnable=wf) + wf = Workflow(values=[0, 1, 2]) - results = wf.result(return_inputs=True) + with Submitter(worker="cf", cache_dir=tmpdir, n_procs=2) as sub: + results = sub(wf) - assert results[0] == {"wf.val": [0, 1, 2]} for el in range(3): - assert np.array_equal(results[1].output.array[el], np.array([el, el])) + assert np.array_equal(results.outputs.array[el], np.array([el, el])) def test_numpy_hash_1(): @@ -83,20 +78,18 @@ def test_numpy_hash_3(): def test_task_numpyinput_1(tmp_path: Path): """task with numeric numpy array as an input""" - nn = identity(name="NA") - nn.cache_dir = tmp_path - nn.split(x=[np.array([1, 2]), np.array([3, 4])]) + nn = Identity().split(x=[np.array([1, 2]), np.array([3, 4])]) # checking the results - results = nn() - assert (results[0].output.out == np.array([1, 2])).all() - assert (results[1].output.out == np.array([3, 4])).all() + outputs = nn(cache_dir=tmp_path) + assert (np.array(outputs.out) == np.array([[1, 2], [3, 4]])).all() def test_task_numpyinput_2(tmp_path: Path): """task with numpy array of type object as an input""" - nn = identity(name="NA") - nn.cache_dir = tmp_path - nn.split(x=[np.array(["VAL1"], dtype=object), np.array(["VAL2"], dtype=object)]) + nn = Identity().split( + x=[np.array(["VAL1"], dtype=object), np.array(["VAL2"], dtype=object)] + ) # checking the results - results = nn() - assert (results[0].output.out == np.array(["VAL1"], dtype=object)).all() + outputs = nn(cache_dir=tmp_path) + assert outputs.out[0] == np.array(["VAL1"], dtype=object) + assert outputs.out[1] == np.array(["VAL2"], dtype=object) diff --git a/pydra/engine/tests/test_profiles.py b/pydra/engine/tests/test_profiles.py index f84f8d19f4..b8dbcaabe5 100644 --- a/pydra/engine/tests/test_profiles.py +++ b/pydra/engine/tests/test_profiles.py @@ -1,29 +1,29 @@ -from ..core import Workflow from ..helpers import load_task -from ... import mark +from pydra.design import python, workflow +from pydra.engine.core import Task +from pydra.engine.submitter import Submitter import numpy as np from pympler import asizeof from pytest import approx -def generate_list(l): - return np.arange(l).tolist() +def generate_list(n): + return np.arange(n).tolist() -@mark.task -def show_var(a): +@python.define +def ShowVar(a): return a def create_wf(size): - wf = Workflow(name="wf", input_spec=["x"]) - wf.split("x", x=generate_list(size)) - wf.add(show_var(name="show", a=wf.lzin.x)) - wf.set_output([("out", wf.show.lzout.out)]) - wf.state.prepare_states(wf.inputs) - wf.state.prepare_inputs() - return wf + @workflow.define + def Workflow(x): + show = workflow.add(ShowVar(a=x)) + return show.out + + return Workflow().split(x=generate_list(size)) def test_wf_memory(): @@ -31,35 +31,15 @@ def test_wf_memory(): testings if the size of workflow grows linearly """ - wf_1000 = create_wf(size=1000) - wf_1000_mem = asizeof.asizeof(wf_1000) + wf_10000 = create_wf(size=10000) + wf_10000_mem = asizeof.asizeof(wf_10000) - wf_2000 = create_wf(size=2000) - wf_2000_mem = asizeof.asizeof(wf_2000) + wf_20000 = create_wf(size=20000) + wf_20000_mem = asizeof.asizeof(wf_20000) - wf_4000 = create_wf(size=4000) - wf_4000_mem = asizeof.asizeof(wf_4000) + wf_40000 = create_wf(size=40000) + wf_40000_mem = asizeof.asizeof(wf_40000) # checking if it's linear with the size of the splitter # check print(asizeof.asized(wf_4000, detail=2).format()) in case of problems - assert wf_4000_mem / wf_2000_mem == approx(2, 0.05) - assert wf_2000_mem / wf_1000_mem == approx(2, 0.05) - - -def test_load_task_memory(): - """creating two workflow with relatively big splitter: 1000 and 4000 elements - testings if load_task for a single element returns tasks of a similar size - """ - - wf_1000 = create_wf(size=1000) - wf_1000_pkl = wf_1000.pickle_task() - wf_1000_loaded = load_task(task_pkl=wf_1000_pkl, ind=1) - wf_1000_single_mem = asizeof.asizeof(wf_1000_loaded) - - wf_4000 = create_wf(size=4000) - wf_4000_pkl = wf_4000.pickle_task() - wf_4000_loaded = load_task(task_pkl=wf_4000_pkl, ind=1) - wf_4000_single_mem = asizeof.asizeof(wf_4000_loaded) - - # checking if it doesn't change with size of the splitter - # check print(asizeof.asized(wf_4000_loaded, detail=2).format()) in case of problems - assert wf_1000_single_mem / wf_4000_single_mem == approx(1, 0.05) + assert wf_40000_mem / wf_20000_mem == approx(2, 0.05) + assert wf_20000_mem / wf_10000_mem == approx(2, 0.05) diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 4857db094f..e0d9a35add 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -1,107 +1,108 @@ -import attr import typing as ty -import os, sys -import subprocess as sp +import os +import sys import pytest from pathlib import Path import re import stat - -from ..task import ShellCommandTask -from ..submitter import Submitter -from ..core import Workflow -from ..specs import ( - ShellOutSpec, - ShellSpec, - SpecInfo, +import attrs +from pydra.engine.submitter import Submitter +from pydra.design import shell, workflow, python +from pydra.engine.specs import ( + ShellOutputs, + ShellDef, +) +from fileformats.generic import ( File, Directory, - MultiInputFile, +) +from pydra.utils.typing import ( MultiOutputFile, MultiInputObj, + StateArray, ) -from .utils import result_no_submitter, result_submitter, no_win +from .utils import run_no_submitter, run_submitter, no_win, get_output_names if sys.platform.startswith("win"): pytest.skip("SLURM not available in windows", allow_module_level=True) @pytest.mark.flaky(reruns=2) # when dask -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_1(plugin_dask_opt, results_function, tmp_path): """simple command, no arguments""" cmd = ["pwd"] - shelly = ShellCommandTask(name="shelly", executable=cmd, cache_dir=tmp_path) + shelly = shell.define(cmd)() assert shelly.cmdline == " ".join(cmd) - res = results_function(shelly, plugin=plugin_dask_opt) - assert Path(res.output.stdout.rstrip()) == shelly.output_dir - assert res.output.return_code == 0 - assert res.output.stderr == "" + outputs = results_function(shelly, plugin=plugin_dask_opt, cache_dir=tmp_path) + assert Path(outputs.stdout.rstrip()).parent == tmp_path + assert outputs.return_code == 0 + assert outputs.stderr == "" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_1_strip(plugin, results_function, tmp_path): """simple command, no arguments strip option to remove \n at the end os stdout """ cmd = ["pwd"] - shelly = ShellCommandTask(name="shelly", executable=cmd, strip=True) - shelly.cache_dir = tmp_path + shelly = shell.define(cmd)() + assert shelly.cmdline == " ".join(cmd) - res = results_function(shelly, plugin) - assert Path(res.output.stdout) == Path(shelly.output_dir) - assert res.output.return_code == 0 - assert res.output.stderr == "" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert Path(outputs.stdout).parent == tmp_path + assert outputs.return_code == 0 + assert outputs.stderr == "" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_2(plugin, results_function, tmp_path): """a command with arguments, cmd and args given as executable""" cmd = ["echo", "hail", "pydra"] - shelly = ShellCommandTask(name="shelly", executable=cmd) - shelly.cache_dir = tmp_path + shelly = shell.define(cmd)() + assert shelly.cmdline == " ".join(cmd) - res = results_function(shelly, plugin) - assert res.output.stdout.strip() == " ".join(cmd[1:]) - assert res.output.return_code == 0 - assert res.output.stderr == "" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout.strip() == " ".join(cmd[1:]) + assert outputs.return_code == 0 + assert outputs.stderr == "" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_2a(plugin, results_function, tmp_path): """a command with arguments, using executable and args""" cmd_exec = "echo" cmd_args = ["hail", "pydra"] # separate command into exec + args - shelly = ShellCommandTask(name="shelly", executable=cmd_exec, args=cmd_args) - shelly.cache_dir = tmp_path - assert shelly.inputs.executable == "echo" + shelly = shell.define(cmd_exec)(additional_args=cmd_args) + + assert shelly.executable == "echo" assert shelly.cmdline == "echo " + " ".join(cmd_args) - res = results_function(shelly, plugin) - assert res.output.stdout.strip() == " ".join(cmd_args) - assert res.output.return_code == 0 - assert res.output.stderr == "" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout.strip() == " ".join(cmd_args) + assert outputs.return_code == 0 + assert outputs.stderr == "" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_2b(plugin, results_function, tmp_path): """a command with arguments, using strings executable and args""" cmd_exec = "echo" - cmd_args = "pydra" + cmd_args = ["pydra"] # separate command into exec + args - shelly = ShellCommandTask(name="shelly", executable=cmd_exec, args=cmd_args) - shelly.cache_dir = tmp_path - assert shelly.inputs.executable == "echo" + shelly = shell.define(cmd_exec)(additional_args=cmd_args) + + assert shelly.executable == "echo" assert shelly.cmdline == "echo pydra" - res = results_function(shelly, plugin) - assert res.output.stdout == "pydra\n" - assert res.output.return_code == 0 - assert res.output.stderr == "" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "pydra\n" + assert outputs.return_code == 0 + assert outputs.stderr == "" # tests with State @@ -115,19 +116,18 @@ def test_shell_cmd_3(plugin_dask_opt, tmp_path): cmd = ["pwd", "whoami"] # all args given as executable - shelly = ShellCommandTask(name="shelly").split("executable", executable=cmd) - shelly.cache_dir = tmp_path + shelly = shell.define("shelly")().split(executable=cmd) # assert shelly.cmdline == ["pwd", "whoami"] - res = shelly(plugin=plugin_dask_opt) - assert Path(res[0].output.stdout.rstrip()) == shelly.output_dir[0] + outputs = shelly(plugin=plugin_dask_opt, cache_dir=tmp_path) + assert Path(outputs.stdout[0].rstrip()).parent == tmp_path if "USER" in os.environ: - assert res[1].output.stdout == f"{os.environ['USER']}\n" + assert outputs.stdout[1] == f"{os.environ['USER']}\n" else: - assert res[1].output.stdout - assert res[0].output.return_code == res[1].output.return_code == 0 - assert res[0].output.stderr == res[1].output.stderr == "" + assert outputs.stdout[1] + assert outputs.return_code[0] == outputs.return_code[1] == 0 + assert outputs.stderr[0] == outputs.stderr[1] == "" def test_shell_cmd_4(plugin, tmp_path): @@ -135,23 +135,20 @@ def test_shell_cmd_4(plugin, tmp_path): splitter=args """ cmd_exec = "echo" - cmd_args = ["nipype", "pydra"] + cmd_args = [["nipype"], ["pydra"]] # separate command into exec + args - shelly = ShellCommandTask(name="shelly", executable=cmd_exec).split( - splitter="args", args=cmd_args - ) - shelly.cache_dir = tmp_path + shelly = shell.define(cmd_exec)().split(additional_args=cmd_args) - assert shelly.inputs.executable == "echo" - assert shelly.inputs.args == ["nipype", "pydra"] + assert shelly.executable == "echo" + assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) # assert shelly.cmdline == ["echo nipype", "echo pydra"] - res = shelly(plugin=plugin) + outputs = shelly(plugin=plugin) - assert res[0].output.stdout == "nipype\n" - assert res[1].output.stdout == "pydra\n" + assert outputs.stdout[0] == "nipype\n" + assert outputs.stdout[1] == "pydra\n" - assert res[0].output.return_code == res[1].output.return_code == 0 - assert res[0].output.stderr == res[1].output.stderr == "" + assert outputs.return_code[0] == outputs.return_code[1] == 0 + assert outputs.stderr[0] == outputs.stderr[1] == "" def test_shell_cmd_5(plugin, tmp_path): @@ -159,22 +156,21 @@ def test_shell_cmd_5(plugin, tmp_path): using splitter and combiner for args """ cmd_exec = "echo" - cmd_args = ["nipype", "pydra"] + cmd_args = [["nipype"], ["pydra"]] # separate command into exec + args shelly = ( - ShellCommandTask(name="shelly", executable=cmd_exec) - .split(splitter="args", args=cmd_args) - .combine("args") + shell.define(cmd_exec)() + .split(additional_args=cmd_args) + .combine("additional_args") ) - shelly.cache_dir = tmp_path - assert shelly.inputs.executable == "echo" - assert shelly.inputs.args == ["nipype", "pydra"] + assert shelly.executable == "echo" + assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) # assert shelly.cmdline == ["echo nipype", "echo pydra"] - res = shelly(plugin=plugin) + outputs = shelly(plugin=plugin) - assert res[0].output.stdout == "nipype\n" - assert res[1].output.stdout == "pydra\n" + assert outputs.stdout[0] == "nipype\n" + assert outputs.stdout[1] == "pydra\n" def test_shell_cmd_6(plugin, tmp_path): @@ -182,40 +178,30 @@ def test_shell_cmd_6(plugin, tmp_path): outer splitter for executable and args """ cmd_exec = ["echo", ["echo", "-n"]] - cmd_args = ["nipype", "pydra"] + cmd_args = [["nipype"], ["pydra"]] # separate command into exec + args - shelly = ShellCommandTask(name="shelly").split( - splitter=["executable", "args"], executable=cmd_exec, args=cmd_args + shelly = shell.define("shelly")().split( + ["executable", "additional_args"], executable=cmd_exec, additional_args=cmd_args ) - shelly.cache_dir = tmp_path - - assert shelly.inputs.executable == ["echo", ["echo", "-n"]] - assert shelly.inputs.args == ["nipype", "pydra"] - # assert shelly.cmdline == [ - # "echo nipype", - # "echo pydra", - # "echo -n nipype", - # "echo -n pydra", - # ] - res = shelly(plugin=plugin) - - assert res[0].output.stdout == "nipype\n" - assert res[1].output.stdout == "pydra\n" - assert res[2].output.stdout == "nipype" - assert res[3].output.stdout == "pydra" + + assert shelly.executable == ["echo", ["echo", "-n"]] + assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) + outputs = shelly(cache_dir=tmp_path, plugin=plugin) + + assert outputs.stdout == ["nipype\n", "pydra\n", "nipype", "pydra"] assert ( - res[0].output.return_code - == res[1].output.return_code - == res[2].output.return_code - == res[3].output.return_code + outputs.return_code[0] + == outputs.return_code[1] + == outputs.return_code[2] + == outputs.return_code[3] == 0 ) assert ( - res[0].output.stderr - == res[1].output.stderr - == res[2].output.stderr - == res[3].output.stderr + outputs.stderr[0] + == outputs.stderr[1] + == outputs.stderr[2] + == outputs.stderr[3] == "" ) @@ -225,25 +211,24 @@ def test_shell_cmd_7(plugin, tmp_path): outer splitter for executable and args, and combiner=args """ cmd_exec = ["echo", ["echo", "-n"]] - cmd_args = ["nipype", "pydra"] + cmd_args = [["nipype"], ["pydra"]] # separate command into exec + args shelly = ( - ShellCommandTask(name="shelly") - .split(splitter=["executable", "args"], executable=cmd_exec, args=cmd_args) - .combine("args") + shell.define("shelly")() + .split( + ["executable", "additional_args"], + executable=cmd_exec, + additional_args=cmd_args, + ) + .combine("additional_args") ) - shelly.cache_dir = tmp_path - - assert shelly.inputs.executable == ["echo", ["echo", "-n"]] - assert shelly.inputs.args == ["nipype", "pydra"] - res = shelly(plugin=plugin) + assert shelly.executable == ["echo", ["echo", "-n"]] + assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) - assert res[0][0].output.stdout == "nipype\n" - assert res[0][1].output.stdout == "pydra\n" + outputs = shelly(plugin=plugin) - assert res[1][0].output.stdout == "nipype" - assert res[1][1].output.stdout == "pydra" + assert outputs.stdout == [["nipype\n", "pydra\n"], ["nipype", "pydra"]] # tests with workflows @@ -251,31 +236,32 @@ def test_shell_cmd_7(plugin, tmp_path): def test_wf_shell_cmd_1(plugin, tmp_path): """a workflow with two connected commands""" - wf = Workflow(name="wf", input_spec=["cmd1", "cmd2"]) - wf.inputs.cmd1 = "pwd" - wf.inputs.cmd2 = "ls" - wf.add(ShellCommandTask(name="shelly_pwd", executable=wf.lzin.cmd1, strip=True)) - wf.add( - ShellCommandTask( - name="shelly_ls", executable=wf.lzin.cmd2, args=wf.shelly_pwd.lzout.stdout - ) - ) - wf.set_output([("out", wf.shelly_ls.lzout.stdout)]) - wf.cache_dir = tmp_path + @workflow.define + def Workflow(cmd1, cmd2): + shelly_pwd = workflow.add(shell.define(cmd1)()) - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + @python.define + def StripAndListify(x: str) -> list[str]: + return [x.strip()] + + listify = workflow.add(StripAndListify(x=shelly_pwd.stdout)) + shelly_ls = workflow.add(shell.define(cmd2)(additional_args=listify.out)) + return shelly_ls.stdout - res = wf.result() - assert "_result.pklz" in res.output.out - assert "_task.pklz" in res.output.out + wf = Workflow(cmd1="pwd", cmd2="ls") + with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: + res = sub(wf) -# customised input spec + assert "_result.pklz" in res.outputs.out + assert "_task.pklz" in res.outputs.out -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +# customised input definition + + +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_1(plugin, results_function, tmp_path): """a command with executable, args and one command opt, using a customized input_spec to add the opt to the command @@ -283,39 +269,31 @@ def test_shell_cmd_inputspec_1(plugin, results_function, tmp_path): """ cmd_exec = "echo" cmd_opt = True - cmd_args = "hello from pydra" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "opt_n", - attr.ib( - type=bool, - metadata={"position": 1, "argstr": "-n", "help_string": "option"}, - ), - ) - ], - bases=(ShellSpec,), - ) + cmd_args = ["hello from pydra"] + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + opt_n: bool = shell.arg( + position=1, + argstr="-n", + help="option", + ) + + class Outputs(ShellOutputs): + pass # separate command into exec + args - shelly = ShellCommandTask( - name="shelly", - executable=cmd_exec, - args=cmd_args, - opt_n=cmd_opt, - input_spec=my_input_spec, - cache_dir=tmp_path, - ) - assert shelly.inputs.executable == cmd_exec - assert shelly.inputs.args == cmd_args + shelly = Shelly(additional_args=cmd_args, opt_n=cmd_opt) + assert shelly.executable == cmd_exec + assert shelly.additional_args == cmd_args assert shelly.cmdline == "echo -n 'hello from pydra'" - res = results_function(shelly, plugin) - assert res.output.stdout == "hello from pydra" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "hello from pydra" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_2(plugin, results_function, tmp_path): """a command with executable, args and two command options, using a customized input_spec to add the opt to the command @@ -324,413 +302,296 @@ def test_shell_cmd_inputspec_2(plugin, results_function, tmp_path): cmd_exec = "echo" cmd_opt = True cmd_opt_hello = "HELLO" - cmd_args = "from pydra" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "opt_hello", - attr.ib( - type=str, - metadata={"position": 3, "help_string": "todo", "argstr": ""}, - ), - ), - ( - "opt_n", - attr.ib( - type=bool, - metadata={"position": 1, "help_string": "todo", "argstr": "-n"}, - ), - ), - ], - bases=(ShellSpec,), - ) + cmd_args = ["from pydra"] + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + opt_hello: str = shell.arg( + position=3, + help="todo", + argstr="", + ) + opt_n: bool = shell.arg( + position=1, + help="todo", + argstr="-n", + ) + + class Outputs(ShellOutputs): + pass # separate command into exec + args - shelly = ShellCommandTask( - name="shelly", - executable=cmd_exec, - args=cmd_args, - opt_n=cmd_opt, - opt_hello=cmd_opt_hello, - input_spec=my_input_spec, - cache_dir=tmp_path, - ) - assert shelly.inputs.executable == cmd_exec - assert shelly.inputs.args == cmd_args + shelly = Shelly(additional_args=cmd_args, opt_n=cmd_opt, opt_hello=cmd_opt_hello) + assert shelly.executable == cmd_exec + assert shelly.additional_args == cmd_args assert shelly.cmdline == "echo -n HELLO 'from pydra'" - res = results_function(shelly, plugin) - assert res.output.stdout == "HELLO from pydra" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "HELLO from pydra" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_3(plugin, results_function, tmp_path): """mandatory field added to fields, value provided""" cmd_exec = "echo" hello = "HELLO" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "text", - "mandatory": True, - "argstr": "", - }, - ), - ) - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg( + position=1, + help="text", + argstr="", + ) + + class Outputs(ShellOutputs): + pass # separate command into exec + args - shelly = ShellCommandTask( - name="shelly", - executable=cmd_exec, - text=hello, - input_spec=my_input_spec, - cache_dir=tmp_path, - ) - assert shelly.inputs.executable == cmd_exec + shelly = Shelly(text=hello) + assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - res = results_function(shelly, plugin) - assert res.output.stdout == "HELLO\n" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "HELLO\n" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_3a(plugin, results_function, tmp_path): """mandatory field added to fields, value provided - using shorter syntax for input spec (no attr.ib) + using shorter syntax for input (no attr.ib) """ cmd_exec = "echo" hello = "HELLO" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - str, - {"position": 1, "help_string": "text", "mandatory": True, "argstr": ""}, - ) - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg(position=1, help="text", argstr="") + + class Outputs(ShellOutputs): + pass # separate command into exec + args - shelly = ShellCommandTask( - name="shelly", - executable=cmd_exec, - text=hello, - input_spec=my_input_spec, - cache_dir=tmp_path, - ) - assert shelly.inputs.executable == cmd_exec + shelly = Shelly(text=hello) + assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - res = results_function(shelly, plugin) - assert res.output.stdout == "HELLO\n" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "HELLO\n" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_3b(plugin, results_function, tmp_path): """mandatory field added to fields, value provided after init""" cmd_exec = "echo" hello = "HELLO" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "text", - "mandatory": True, - "argstr": "", - }, - ), - ) - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg( + position=1, + help="text", + argstr="", + ) + + class Outputs(ShellOutputs): + pass # separate command into exec + args - shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path - ) - shelly.inputs.text = hello + shelly = Shelly(executable=cmd_exec) + shelly.text = hello - assert shelly.inputs.executable == cmd_exec + assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - res = results_function(shelly, plugin) - assert res.output.stdout == "HELLO\n" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "HELLO\n" def test_shell_cmd_inputspec_3c_exception(plugin, tmp_path): """mandatory field added to fields, value is not provided, so exception is raised""" cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "text", - "mandatory": True, - "argstr": "", - }, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg( + position=1, + help="text", + argstr="", + ) - with pytest.raises(Exception) as excinfo: - shelly() - assert "mandatory" in str(excinfo.value) + class Outputs(ShellOutputs): + pass + + shelly = Shelly(executable=cmd_exec) + with pytest.raises(ValueError, match="Mandatory field 'text' is not set"): + shelly(cache_dir=tmp_path) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) + +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_3c(plugin, results_function, tmp_path): """mandatory=False, so tasks runs fine even without the value""" cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=ty.Optional[str], - default=None, - metadata={ - "position": 1, - "help_string": "text", - "mandatory": False, - "argstr": "", - }, - ), - ) - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: ty.Optional[str] = shell.arg( + default=None, + position=1, + help="text", + argstr="", + ) + + class Outputs(ShellOutputs): + pass # separate command into exec + args - shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path - ) + shelly = Shelly(executable=cmd_exec) - assert shelly.inputs.executable == cmd_exec + assert shelly.executable == cmd_exec assert shelly.cmdline == "echo" - res = results_function(shelly, plugin) - assert res.output.stdout == "\n" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "\n" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_4(plugin, results_function, tmp_path): """mandatory field added to fields, value provided""" cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - default="Hello", - metadata={"position": 1, "help_string": "text", "argstr": ""}, - ), - ) - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg( + default="Hello", + position=1, + help="text", + argstr="", + ) + + class Outputs(ShellOutputs): + pass # separate command into exec + args - shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path - ) + shelly = Shelly(executable=cmd_exec) - assert shelly.inputs.executable == cmd_exec + assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hello" - res = results_function(shelly, plugin) - assert res.output.stdout == "Hello\n" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "Hello\n" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_4a(plugin, results_function, tmp_path): """mandatory field added to fields, value provided - using shorter syntax for input spec (no attr.ib) + using shorter syntax for input (no attr.ib) """ cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ("text", str, "Hello", {"position": 1, "help_string": "text", "argstr": ""}) - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg(default="Hello", position=1, help="text", argstr="") + + class Outputs(ShellOutputs): + pass # separate command into exec + args - shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path - ) + shelly = Shelly(executable=cmd_exec) - assert shelly.inputs.executable == cmd_exec + assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hello" - res = results_function(shelly, plugin) - assert res.output.stdout == "Hello\n" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "Hello\n" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_4b(plugin, results_function, tmp_path): """mandatory field added to fields, value provided""" cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - default="Hi", - metadata={"position": 1, "help_string": "text", "argstr": ""}, - ), - ) - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg( + default="Hi", + position=1, + help="text", + argstr="", + ) + + class Outputs(ShellOutputs): + pass # separate command into exec + args - shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path - ) + shelly = Shelly(executable=cmd_exec) - assert shelly.inputs.executable == cmd_exec + assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hi" - res = results_function(shelly, plugin) - assert res.output.stdout == "Hi\n" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "Hi\n" -def test_shell_cmd_inputspec_4c_exception(plugin): +def test_shell_cmd_inputspec_4d_exception(plugin): """mandatory field added to fields, value provided""" cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - default="Hello", - metadata={ - "position": 1, - "help_string": "text", - "mandatory": True, - "argstr": "", - }, - ), - ) - ], - bases=(ShellSpec,), - ) # separate command into exec + args with pytest.raises( - Exception, match=r"default value \('Hello'\) should not be set when the field" + ValueError, + match=r"path_template \('exception'\) can only be provided when there is no default", ): - ShellCommandTask(name="shelly", executable=cmd_exec, input_spec=my_input_spec) - -def test_shell_cmd_inputspec_4d_exception(plugin): - """mandatory field added to fields, value provided""" - cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - default="Hello", - metadata={ - "position": 1, - "help_string": "text", - "output_file_template": "exception", - "argstr": "", - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: File = shell.outarg( + default="Hello", + position=1, + help="text", + path_template="exception", + argstr="", ) - ], - bases=(ShellSpec,), - ) - # separate command into exec + args - with pytest.raises( - Exception, match=r"default value \('Hello'\) should not be set together" - ) as excinfo: - ShellCommandTask(name="shelly", executable=cmd_exec, input_spec=my_input_spec) + class Outputs(ShellOutputs): + pass -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_5_nosubm(plugin, results_function, tmp_path): """checking xor in metadata: task should work fine, since only one option is True""" cmd_exec = "ls" cmd_t = True - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "opt_t", - attr.ib( - type=bool, - metadata={ - "position": 1, - "help_string": "opt t", - "argstr": "-t", - "xor": ["opt_S"], - }, - ), - ), - ( - "opt_S", - attr.ib( - type=bool, - metadata={ - "position": 2, - "help_string": "opt S", - "argstr": "-S", - "xor": ["opt_t"], - }, - ), - ), - ], - bases=(ShellSpec,), - ) + + @shell.define(xor=["opt_S", "opt_t"]) + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + opt_t: bool = shell.arg( + position=1, + help="opt t", + argstr="-t", + ) + opt_S: bool = shell.arg( + default=False, + position=2, + help="opt S", + argstr="-S", + ) + + class Outputs(ShellOutputs): + pass # separate command into exec + args - shelly = ShellCommandTask( - name="shelly", - executable=cmd_exec, - opt_t=cmd_t, - input_spec=my_input_spec, - cache_dir=tmp_path, - ) - assert shelly.inputs.executable == cmd_exec + shelly = Shelly(opt_t=cmd_t) + assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -t" - results_function(shelly, plugin) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): @@ -738,51 +599,30 @@ def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): cmd_exec = "ls" cmd_t = True cmd_S = True - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "opt_t", - attr.ib( - type=bool, - metadata={ - "position": 1, - "help_string": "opt t", - "argstr": "-t", - "xor": ["opt_S"], - }, - ), - ), - ( - "opt_S", - attr.ib( - type=bool, - metadata={ - "position": 2, - "help_string": "opt S", - "argstr": "-S", - "xor": ["opt_t"], - }, - ), - ), - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd_exec, - opt_t=cmd_t, - opt_S=cmd_S, - input_spec=my_input_spec, - cache_dir=tmp_path, - ) - with pytest.raises(Exception) as excinfo: - shelly() - assert "is mutually exclusive" in str(excinfo.value) + @shell.define(xor=["opt_S", "opt_t"]) + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + opt_t: bool = shell.arg( + position=1, + help="opt t", + argstr="-t", + ) + opt_S: bool = shell.arg( + position=2, + help="opt S", + argstr="-S", + ) + + class Outputs(ShellOutputs): + pass + shelly = Shelly(opt_t=cmd_t, opt_S=cmd_S) + with pytest.raises(ValueError, match="Mutually exclusive fields"): + shelly(cache_dir=tmp_path) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) + +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_6(plugin, results_function, tmp_path): """checking requires in metadata: the required field is set in the init, so the task works fine @@ -790,87 +630,65 @@ def test_shell_cmd_inputspec_6(plugin, results_function, tmp_path): cmd_exec = "ls" cmd_l = True cmd_t = True - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "opt_t", - attr.ib( - type=bool, - metadata={ - "position": 2, - "help_string": "opt t", - "argstr": "-t", - "requires": ["opt_l"], - }, - ), - ), - ( - "opt_l", - attr.ib( - type=bool, - metadata={"position": 1, "help_string": "opt l", "argstr": "-l"}, - ), - ), - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + opt_t: bool = shell.arg( + position=2, + help="opt t", + argstr="-t", + requires=["opt_l"], + ) + opt_l: bool = shell.arg( + position=1, + help="opt l", + argstr="-l", + ) + + class Outputs(ShellOutputs): + pass # separate command into exec + args - shelly = ShellCommandTask( - name="shelly", - executable=cmd_exec, - opt_t=cmd_t, - opt_l=cmd_l, - input_spec=my_input_spec, - cache_dir=tmp_path, - ) - assert shelly.inputs.executable == cmd_exec + shelly = Shelly(opt_t=cmd_t, opt_l=cmd_l) + assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -l -t" - results_function(shelly, plugin) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) -def test_shell_cmd_inputspec_6a_exception(plugin): +def test_shell_cmd_inputspec_6a_exception(plugin, tmp_path): """checking requires in metadata: the required field is None, so the task works raises exception """ cmd_exec = "ls" cmd_t = True - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "opt_t", - attr.ib( - type=bool, - metadata={ - "position": 2, - "help_string": "opt t", - "argstr": "-t", - "requires": ["opt_l"], - }, - ), - ), - ( - "opt_l", - attr.ib( - type=bool, - metadata={"position": 1, "help_string": "opt l", "argstr": "-l"}, - ), - ), - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, opt_t=cmd_t, input_spec=my_input_spec - ) - with pytest.raises(Exception) as excinfo: + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + opt_t: bool = shell.arg( + position=2, + help="opt t", + argstr="-t", + requires=["opt_l"], + ) + opt_l: bool = shell.arg( + default=False, + position=1, + help="opt l", + argstr="-l", + ) + + class Outputs(ShellOutputs): + pass + + shelly = Shelly(executable=cmd_exec, opt_t=cmd_t) + + with pytest.raises(ValueError, match=r"'opt_t' requires \['opt_l'\]"): shelly() - assert "requires" in str(excinfo.value) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_6b(plugin, results_function, tmp_path): """checking requires in metadata: the required field set after the init @@ -878,134 +696,69 @@ def test_shell_cmd_inputspec_6b(plugin, results_function, tmp_path): cmd_exec = "ls" cmd_l = True cmd_t = True - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "opt_t", - attr.ib( - type=bool, - metadata={ - "position": 2, - "help_string": "opt t", - "argstr": "-t", - "requires": ["opt_l"], - }, - ), - ), - ( - "opt_l", - attr.ib( - type=bool, - metadata={"position": 1, "help_string": "opt l", "argstr": "-l"}, - ), - ), - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + opt_t: bool = shell.arg( + position=2, + help="opt t", + argstr="-t", + requires=["opt_l"], + ) + opt_l: bool = shell.arg( + position=1, + help="opt l", + argstr="-l", + ) + + class Outputs(ShellOutputs): + pass # separate command into exec + args - shelly = ShellCommandTask( - name="shelly", - executable=cmd_exec, - opt_t=cmd_t, + shelly = Shelly( + opt_t=cmd_t # opt_l=cmd_l, - input_spec=my_input_spec, - cache_dir=tmp_path, ) - shelly.inputs.opt_l = cmd_l - assert shelly.inputs.executable == cmd_exec + shelly.opt_l = cmd_l + assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -l -t" - results_function(shelly, plugin) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_7(plugin, results_function, tmp_path): """ providing output name using input_spec, using name_tamplate in metadata """ cmd = "touch" - args = "newfile_tmp.txt" - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help_string": "output file", - }, - ), - ) - ], - bases=(ShellSpec,), - ) - - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - args=args, - input_spec=my_input_spec, - cache_dir=tmp_path, - ) + arg = "newfile_tmp.txt" - res = results_function(shelly, plugin) - assert res.output.stdout == "" - out1 = res.output.out1.fspath - assert out1.exists() - # checking if the file is created in a good place - assert shelly.output_dir == out1.parent - assert out1.name == "newfile_tmp.txt" + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + arg: str = shell.arg(argstr=None) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_7a(plugin, results_function, tmp_path): - """ - providing output name using input_spec, - using name_tamplate in metadata - and changing the output name for output_spec using output_field_name - """ - cmd = "touch" - args = "newfile_tmp.txt" - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "output_field_name": "out1_changed", - "help_string": "output file", - }, - ), + class Outputs(ShellOutputs): + out1: File = shell.outarg( + path_template="{arg}", + help="output file", ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - args=args, - input_spec=my_input_spec, - cache_dir=tmp_path, - ) + shelly = Shelly(executable=cmd, arg=arg) - res = results_function(shelly, plugin) - assert res.output.stdout == "" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + out1 = outputs.out1.fspath + assert out1.exists() # checking if the file is created in a good place - assert shelly.output_dir == res.output.out1_changed.fspath.parent - assert res.output.out1_changed.fspath.name == "newfile_tmp.txt" + assert out1.parent.parent == tmp_path + assert out1.name == "newfile_tmp.txt" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_7b(plugin, results_function, tmp_path): """ providing new file and output name using input_spec, @@ -1013,85 +766,59 @@ def test_shell_cmd_inputspec_7b(plugin, results_function, tmp_path): """ cmd = "touch" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "newfile", - attr.ib( - type=str, - metadata={"position": 1, "help_string": "new file", "argstr": ""}, - ), - ), - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{newfile}", - "help_string": "output file", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + newfile: str = shell.arg( + position=1, + help="new file", + argstr="", + ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - newfile="newfile_tmp.txt", - input_spec=my_input_spec, - cache_dir=tmp_path, - ) + class Outputs(ShellOutputs): + out1: File = shell.outarg( + path_template="{newfile}", + help="output file", + ) + + shelly = Shelly(executable=cmd, newfile=File.mock("newfile_tmp.txt")) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.out1.fspath.exists() + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out1.fspath.exists() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_7c(plugin, results_function, tmp_path): """ providing output name using input_spec, using name_tamplate with txt extension (extension from args should be removed """ cmd = "touch" - args = "newfile_tmp.txt" - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}.txt", - "help_string": "output file", - }, - ), + arg = File.mock("newfile_tmp.txt") + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + + arg = shell.arg(argstr=None) + + class Outputs(ShellOutputs): + out1: File = shell.outarg( + path_template="{arg}.txt", + help="output file", ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - args=args, - input_spec=my_input_spec, - cache_dir=tmp_path, - ) + shelly = Shelly(executable=cmd, arg=arg) - res = results_function(shelly, plugin) - assert res.output.stdout == "" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" # checking if the file is created in a good place - assert shelly.output_dir == res.output.out1.fspath.parent - assert res.output.out1.fspath.name == "newfile_tmp.txt" + assert outputs.out1.fspath.parent.parent == tmp_path + assert outputs.out1.fspath.name == "newfile_tmp.txt" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_8(plugin, results_function, tmp_path): """ providing new file and output name using input_spec, @@ -1099,56 +826,38 @@ def test_shell_cmd_inputspec_8(plugin, results_function, tmp_path): """ cmd = "touch" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "newfile", - attr.ib( - type=str, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, - ), - ), - ( - "time", - attr.ib( - type=str, - metadata={ - "position": 1, - "argstr": "-t", - "help_string": "time of modif.", - }, - ), - ), - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{newfile}", - "help_string": "output file", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + newfile: str = shell.arg( + position=2, + help="new file", + argstr="", + ) + time: str = shell.arg( + position=1, + argstr="-t", + help="time of modif.", + ) + + class Outputs(ShellOutputs): + out1: File = shell.outarg( + path_template="{newfile}", + help="output file", + ) - shelly = ShellCommandTask( - name="shelly", + shelly = Shelly( executable=cmd, - newfile="newfile_tmp.txt", + newfile=File.mock("newfile_tmp.txt"), time="02121010", - input_spec=my_input_spec, - cache_dir=tmp_path, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.out1.fspath.exists() + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out1.fspath.exists() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_8a(plugin, results_function, tmp_path): """ providing new file and output name using input_spec, @@ -1156,59 +865,41 @@ def test_shell_cmd_inputspec_8a(plugin, results_function, tmp_path): """ cmd = "touch" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "newfile", - attr.ib( - type=str, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, - ), - ), - ( - "time", - attr.ib( - type=str, - metadata={ - "position": 1, - "argstr": "-t {time}", - "help_string": "time of modif.", - }, - ), - ), - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{newfile}", - "help_string": "output file", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + newfile: str = shell.arg( + position=2, + help="new file", + argstr="", + ) + time: str = shell.arg( + position=1, + argstr="-t {time}", + help="time of modif.", + ) + + class Outputs(ShellOutputs): + out1: File = shell.outarg( + path_template="{newfile}", + help="output file", + ) - shelly = ShellCommandTask( - name="shelly", + shelly = Shelly( executable=cmd, - newfile="newfile_tmp.txt", + newfile=File.mock("newfile_tmp.txt"), time="02121010", - input_spec=my_input_spec, - cache_dir=tmp_path, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.out1.fspath.exists() + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out1.fspath.exists() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_9(tmp_path, plugin, results_function): """ - providing output name using input_spec (output_file_template in metadata), + providing output name using input_spec (path_template in metadata), the template has a suffix, the extension of the file will be moved to the end """ cmd = "cp" @@ -1217,51 +908,39 @@ def test_shell_cmd_inputspec_9(tmp_path, plugin, results_function): file = ddir / ("file.txt") file.write_text("content\n") - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file_orig", - attr.ib( - type=File, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, - ), - ), - ( - "file_copy", - attr.ib( - type=str, - metadata={ - "output_file_template": "{file_orig}_copy", - "help_string": "output file", - "argstr": "", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file_orig: File = shell.arg( + position=1, + help="new file", + argstr="", + ) - shelly = ShellCommandTask( - name="shelly", + class Outputs(ShellOutputs): + file_copy: File = shell.outarg( + path_template="{file_orig}_copy", + help="output file", + argstr="", + ) + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, file_orig=file, - cache_dir=tmp_path, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.file_copy.fspath.exists() - assert res.output.file_copy.fspath.name == "file_copy.txt" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.file_copy.fspath.exists() + assert outputs.file_copy.fspath.name == "file_copy.txt" # checking if it's created in a good place - assert shelly.output_dir == res.output.file_copy.fspath.parent + assert outputs.file_copy.fspath.parent.parent == tmp_path -@pytest.mark.parametrize("results_function", [result_no_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter]) def test_shell_cmd_inputspec_9a(tmp_path, plugin, results_function): """ - providing output name using input_spec (output_file_template in metadata), + providing output name using input_spec (path_template in metadata), the template has a suffix, the extension of the file will be moved to the end the change: input file has directory with a dot """ @@ -1270,97 +949,74 @@ def test_shell_cmd_inputspec_9a(tmp_path, plugin, results_function): file.parent.mkdir() file.write_text("content\n") - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file_orig", - attr.ib( - type=File, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, - ), - ), - ( - "file_copy", - attr.ib( - type=str, - metadata={ - "output_file_template": "{file_orig}_copy", - "help_string": "output file", - "argstr": "", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file_orig: File = shell.arg( + position=1, + help="new file", + argstr="", + ) - shelly = ShellCommandTask( - name="shelly", executable=cmd, input_spec=my_input_spec, file_orig=file - ) + class Outputs(ShellOutputs): + file_copy: File = shell.outarg( + path_template="{file_orig}_copy", + help="output file", + argstr="", + ) + + shelly = Shelly(executable=cmd, file_orig=file) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.file_copy.fspath.exists() - assert res.output.file_copy.fspath.name == "file_copy.txt" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.file_copy.fspath.exists() + assert outputs.file_copy.fspath.name == "file_copy.txt" # checking if it's created in a good place - assert shelly.output_dir == res.output.file_copy.fspath.parent + assert outputs.file_copy.fspath.parent.parent == tmp_path -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_9b(tmp_path, plugin, results_function): """ - providing output name using input_spec (output_file_template in metadata) + providing output name using input_spec (path_template in metadata) and the keep_extension is set to False, so the extension is removed completely. """ cmd = "cp" file = tmp_path / "file.txt" file.write_text("content\n") - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file_orig", - attr.ib( - type=File, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, - ), - ), - ( - "file_copy", - attr.ib( - type=str, - metadata={ - "output_file_template": "{file_orig}_copy", - "keep_extension": False, - "help_string": "output file", - "argstr": "", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file_orig: File = shell.arg( + position=1, + help="new file", + argstr="", + ) + + class Outputs(ShellOutputs): + file_copy: File = shell.outarg( + path_template="{file_orig}_copy", + keep_extension=False, + help="output file", + argstr="", + ) - shelly = ShellCommandTask( - name="shelly", + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, file_orig=file, - cache_dir=tmp_path, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.file_copy.fspath.exists() - assert res.output.file_copy.fspath.name == "file_copy" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.file_copy.fspath.exists() + assert outputs.file_copy.fspath.name == "file_copy" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_9c(tmp_path, plugin, results_function): """ - providing output name using input_spec (output_file_template in metadata) + providing output name using input_spec (path_template in metadata) and the keep_extension is set to False, so the extension is removed completely, no suffix in the template. """ @@ -1368,52 +1024,37 @@ def test_shell_cmd_inputspec_9c(tmp_path, plugin, results_function): file = tmp_path / "file.txt" file.write_text("content\n") - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file_orig", - attr.ib( - type=File, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, - ), - ), - ( - "file_copy", - attr.ib( - type=str, - metadata={ - "output_file_template": "{file_orig}", - "keep_extension": False, - "help_string": "output file", - "argstr": "", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file_orig: File = shell.arg( + position=1, + help="new file", + argstr="", + ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - file_orig=file, - cache_dir=tmp_path, - ) + class Outputs(ShellOutputs): + file_copy: File = shell.outarg( + path_template="{file_orig}", + keep_extension=False, + help="output file", + argstr="", + ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.file_copy.fspath.exists() - assert res.output.file_copy.fspath.name == "file" - assert res.output.file_copy.fspath.parent == shelly.output_dir + shelly = Shelly(executable=cmd, file_orig=file) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.file_copy.fspath.exists() + assert outputs.file_copy.fspath.name == "file" + assert outputs.file_copy.fspath.parent.parent == tmp_path -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) + +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_9d(tmp_path, plugin, results_function): """ providing output name explicitly by manually setting value in input_spec - (instead of using default provided byoutput_file_template in metadata) + (instead of using default provided bypath_template in metadata) """ cmd = "cp" ddir = tmp_path / "data_inp" @@ -1421,49 +1062,37 @@ def test_shell_cmd_inputspec_9d(tmp_path, plugin, results_function): file = ddir / ("file.txt") file.write_text("content\n") - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file_orig", - attr.ib( - type=File, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, - ), - ), - ( - "file_copy", - attr.ib( - type=str, - metadata={ - "output_file_template": "{file_orig}_copy", - "help_string": "output file", - "argstr": "", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file_orig: File = shell.arg( + position=1, + help="new file", + argstr="", + ) - shelly = ShellCommandTask( - name="shelly", + class Outputs(ShellOutputs): + file_copy: File = shell.outarg( + path_template="{file_orig}_copy", + help="output file", + argstr="", + ) + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, file_orig=file, file_copy="my_file_copy.txt", - cache_dir=tmp_path, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.file_copy.fspath.exists() - assert res.output.file_copy.fspath.name == "my_file_copy.txt" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.file_copy.fspath.exists() + assert outputs.file_copy.fspath.name == "my_file_copy.txt" # checking if it's created in a good place - assert shelly.output_dir == res.output.file_copy.fspath.parent + assert outputs.file_copy.fspath.parent.parent == tmp_path -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_10(plugin, results_function, tmp_path): """using input_spec, providing list of files as an input""" @@ -1477,37 +1106,26 @@ def test_shell_cmd_inputspec_10(plugin, results_function, tmp_path): cmd_exec = "cat" files_list = [file_1, file_2] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "files", - attr.ib( - type=ty.List[File], - metadata={ - "position": 1, - "argstr": "...", - "sep": " ", - "help_string": "list of files", - "mandatory": True, - }, - ), - ) - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + files: ty.List[File] = shell.arg( + position=1, + argstr="...", + sep=" ", + help="list of files", + ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd_exec, + class Outputs(ShellOutputs): + pass + + shelly = Shelly( files=files_list, - input_spec=my_input_spec, - cache_dir=tmp_path, ) - assert shelly.inputs.executable == cmd_exec - res = results_function(shelly, plugin) - assert res.output.stdout == "hello from boston" + assert shelly.executable == cmd_exec + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "hello from boston" def test_shell_cmd_inputspec_10_err(tmp_path): @@ -1526,91 +1144,65 @@ def test_shell_cmd_inputspec_10_err(tmp_path): cmd_exec = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "files", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help_string": "a file", - "mandatory": True, - }, - ), - ) - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + files: File = shell.arg( + position=1, + argstr="", + help="a file", + ) + + class Outputs(ShellOutputs): + pass with pytest.raises(FileNotFoundError): - shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, files=file_2, input_spec=my_input_spec - ) + Shelly(executable=cmd_exec, files=file_2) def test_shell_cmd_inputspec_11(tmp_path): - input_fields = [ - ( - "inputFiles", - attr.ib( - type=MultiInputObj[str], - metadata={ - "argstr": "...", - "help_string": "The list of input image files to be segmented.", - }, - ), - ) - ] - output_fields = [ - ( - "outputFiles", - attr.ib( - type=MultiOutputFile, - metadata={ - "help_string": "Corrected Output Images: should specify the same number of images as inputVolume, if only one element is given, then it is used as a file pattern where %s is replaced by the imageVolumeType, and %d by the index list location.", - "output_file_template": "{inputFiles}", - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + inputFiles: MultiInputObj[str] = shell.arg( + argstr="...", + help="The list of input image files to be segmented.", ) - ] - input_spec = SpecInfo(name="Input", fields=input_fields, bases=(ShellSpec,)) - output_spec = SpecInfo(name="Output", fields=output_fields, bases=(ShellOutSpec,)) + executable = "touch" - task = ShellCommandTask( - name="echoMultiple", - executable="touch", - input_spec=input_spec, - output_spec=output_spec, - ) + class Outputs(ShellOutputs): + outputFiles: MultiOutputFile = shell.outarg( + help="""Corrected Output Images: should specify the same number of + images as inputVolume, if only one element is given, then it is used as + a file pattern where %s is replaced by the imageVolumeType, + and %d by the index list location.""", + path_template="{inputFiles}", + ) - wf = Workflow(name="wf", input_spec=["inputFiles"], inputFiles=["test1", "test2"]) + @workflow.define + def Workflow(inputFiles): - task.inputs.inputFiles = wf.lzin.inputFiles + echoMultiple = workflow.add(Shelly(inputFiles=inputFiles)) + return echoMultiple.outputFiles - wf.add(task) - wf.set_output([("out", wf.echoMultiple.lzout.outputFiles)]) + wf = Workflow(inputFiles=[File.mock("test1"), File.mock("test2")]) # XXX: Figure out why this fails with "cf". Occurs in CI when using Ubuntu + Python >= 3.10 # (but not when using macOS + Python >= 3.10). Same error occurs in test_shell_cmd_outputspec_7a # see https://github.com/nipype/pydra/issues/671 - with Submitter(plugin="serial") as sub: - sub(wf) - result = wf.result() + with Submitter(worker="debug") as sub: + result = sub(wf) - for out_file in result.output.out: + for out_file in result.outputs.out: assert out_file.fspath.name == "test1" or out_file.fspath.name == "test2" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_12(tmp_path: Path, plugin, results_function): """ providing output name using input_spec - output_file_template is provided as a function that returns + path_template is provided as a function that returns various templates depending on the values of inputs fields """ cmd = "cp" @@ -1625,94 +1217,78 @@ def template_function(inputs): else: return "{file_orig}_odd" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file_orig", - attr.ib( - type=File, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, - ), - ), - ( - "number", - attr.ib( - type=int, - metadata={"help_string": "a number", "mandatory": True}, - ), - ), - ( - "file_copy", - attr.ib( - type=str, - metadata={ - "output_file_template": template_function, - "help_string": "output file", - "argstr": "", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file_orig: File = shell.arg( + position=2, + help="new file", + argstr="", + ) + number: int = shell.arg( + help="a number", + argstr=None, + ) + + class Outputs(ShellOutputs): + file_copy: File = shell.outarg( + path_template=template_function, + help="output file", + argstr="", + ) - shelly = ShellCommandTask( - name="shelly", + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, file_orig=file, number=2, - cache_dir=tmp_path, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - fspath = res.output.file_copy.fspath + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + fspath = outputs.file_copy.fspath assert fspath.exists() assert fspath.name == "file_even.txt" # checking if it's created in a good place - assert shelly.output_dir == fspath.parent + assert fspath.parent.parent == tmp_path def test_shell_cmd_inputspec_with_iterable(): """Test formatting of argstr with different iterable types.""" - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "iterable_1", - ty.Iterable[int], - { - "help_string": "iterable input 1", - "argstr": "--in1", - }, - ), - ( - "iterable_2", - ty.Iterable[str], - { - "help_string": "iterable input 2", - "argstr": "--in2...", - }, - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "test" + iterable_1: list[int] = shell.arg( + help="iterable input 1", + argstr="--in1", + sep=" ", + ) + iterable_2: set[str] = shell.arg( + help="iterable input 2", + argstr="--in2", + sep=" ", + ) + iterable_3: tuple[float, ...] = shell.arg( + help="iterable input 3", + argstr="--in3...", + ) + + class Outputs(ShellOutputs): + pass - task = ShellCommandTask(name="test", input_spec=input_spec, executable="test") + task = Shelly() for iterable_type in (list, tuple): - task.inputs.iterable_1 = iterable_type(range(3)) - task.inputs.iterable_2 = iterable_type(["bar", "foo"]) - assert task.cmdline == "test --in1 0 1 2 --in2 bar --in2 foo" + task.iterable_1 = iterable_type(range(3)) + task.iterable_2 = iterable_type(["foo"]) + task.iterable_3 = iterable_type([1, 0]) + assert task.cmdline == "test --in1 0 1 2 --in2 foo --in3 1.0 --in3 0.0" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmp_path): """shelltask changes a file in place, - adding copyfile=True to the file-input from input_spec + adding copy_mode="copy" to the file-input from input_spec hardlink or copy in the output_dir should be created """ file = tmp_path / "file_pydra.txt" @@ -1721,57 +1297,35 @@ def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmp_path): cmd = ["sed", "-is", "s/hello/hi/"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help_string": "orig file", - "mandatory": True, - "copyfile": True, - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{orig_file}", - "help_string": "output file", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + orig_file: File = shell.arg( + help="orig file", + copy_mode="copy", + ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - orig_file=str(file), - cache_dir=tmp_path, - ) + class Outputs(ShellOutputs): + out_file: File = shell.out( + help="output file", + callable=lambda orig_file: orig_file, + ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.out_file.fspath.exists() + shelly = Shelly(executable=cmd, orig_file=str(file)) + + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out_file.fspath.exists() # the file is copied, and than it is changed in place - assert res.output.out_file.fspath.parent == shelly.output_dir - with open(res.output.out_file) as f: + assert outputs.out_file.fspath.parent.parent == tmp_path + with open(outputs.out_file) as f: assert "hi from pydra\n" == f.read() # the original file is unchanged with open(file) as f: assert "hello from pydra\n" == f.read() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmp_path): """shelltask changes a file in place, adding copyfile=False to the File-input from input_spec @@ -1783,59 +1337,39 @@ def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmp_path): cmd = ["sed", "-is", "s/hello/hi/"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help_string": "orig file", - "mandatory": True, - "copyfile": "hardlink", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{orig_file}", - "help_string": "output file", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + orig_file: File = shell.arg( + position=1, + argstr="", + help="orig file", + copy_mode="hardlink", + ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - orig_file=str(file), - cache_dir=tmp_path, - ) + class Outputs(ShellOutputs): + out_file: File = shell.out( + callable=lambda orig_file: orig_file, + help="output file", + ) + + shelly = Shelly(executable=cmd, orig_file=str(file)) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.out_file.fspath.exists() + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out_file.fspath.exists() # the file is uses a soft link, but it creates and an extra copy before modifying - assert res.output.out_file.fspath.parent == shelly.output_dir + assert outputs.out_file.fspath.parent.parent == tmp_path - assert res.output.out_file.fspath.parent.joinpath( - res.output.out_file.fspath.name + "s" + assert outputs.out_file.fspath.parent.joinpath( + outputs.out_file.fspath.name + "s" ).exists() - with open(res.output.out_file) as f: + with open(outputs.out_file) as f: assert "hi from pydra\n" == f.read() # the file is uses a soft link, but it creates and an extra copy # it might depend on the OS - linked_file_copy = res.output.out_file.fspath.parent.joinpath( - res.output.out_file.fspath.name + "s" + linked_file_copy = outputs.out_file.fspath.parent.joinpath( + outputs.out_file.fspath.name + "s" ) if linked_file_copy.exists(): with open(linked_file_copy) as f: @@ -1846,180 +1380,104 @@ def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmp_path): assert "hello from pydra\n" == f.read() -@pytest.mark.xfail( - reason="not sure if we want to support input overwrite," - "if we allow for this orig_file is changing, so does checksum," - " and the results can't be found" -) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_copyfile_1b(plugin, results_function, tmp_path): - """shelltask changes a file in place, - copyfile is None for the file-input, so original filed is changed - """ - file = tmp_path / "file_pydra.txt" - with open(file, "w") as f: - f.write("hello from pydra\n") - - cmd = ["sed", "-is", "s/hello/hi/"] - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help_string": "orig file", - "mandatory": True, - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{orig_file}", - "help_string": "output file", - }, - ), - ), - ], - bases=(ShellSpec,), - ) - - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - orig_file=str(file), - cache_dir=tmp_path, - ) - - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.out_file.fspath.exists() - # the file is not copied, it is changed in place - assert res.output.out_file == file - with open(res.output.out_file) as f: - assert "hi from pydra\n" == f.read() - - -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_state_1(plugin, results_function, tmp_path): """adding state to the input from input_spec""" cmd_exec = "echo" hello = ["HELLO", "hi"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "text", - "mandatory": True, - "argstr": "", - }, - ), - ) - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg( + position=1, + help="text", + argstr="", + ) + + class Outputs(ShellOutputs): + pass # separate command into exec + args - shelly = ShellCommandTask( - name="shelly", - executable=cmd_exec, - input_spec=my_input_spec, - cache_dir=tmp_path, - ).split("text", text=hello) - assert shelly.inputs.executable == cmd_exec + shelly = Shelly().split("text", text=hello) + assert shelly.executable == cmd_exec # todo: this doesn't work when state # assert shelly.cmdline == "echo HELLO" - res = results_function(shelly, plugin) - assert res[0].output.stdout == "HELLO\n" - assert res[1].output.stdout == "hi\n" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout[0] == "HELLO\n" + assert outputs.stdout[1] == "hi\n" -def test_shell_cmd_inputspec_typeval_1(): +def test_shell_cmd_inputspec_typeval_1(tmp_path): """customized input_spec with a type that doesn't match the value - raise an exception """ cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=int, - metadata={"position": 1, "argstr": "", "help_string": "text"}, - ), - ) - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: int = shell.arg( + position=1, + argstr="", + help="text", + ) + + class Outputs(ShellOutputs): + pass with pytest.raises(TypeError): - ShellCommandTask(executable=cmd_exec, text="hello", input_spec=my_input_spec) + Shelly(text="hello") -def test_shell_cmd_inputspec_typeval_2(): +def test_shell_cmd_inputspec_typeval_2(tmp_path): """customized input_spec (shorter syntax) with a type that doesn't match the value - raise an exception """ cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[("text", int, {"position": 1, "argstr": "", "help_string": "text"})], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + + text: int = shell.arg(position=1, argstr="", help="text") + + class Outputs(ShellOutputs): + pass with pytest.raises(TypeError): - ShellCommandTask(executable=cmd_exec, text="hello", input_spec=my_input_spec) + Shelly(text="hello") -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_state_1a(plugin, results_function, tmp_path): """adding state to the input from input_spec using shorter syntax for input_spec (without default) """ cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - str, - {"position": 1, "help_string": "text", "mandatory": True, "argstr": ""}, - ) - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg( + position=1, + help="text", + argstr="", + ) + + class Outputs(ShellOutputs): + pass # separate command into exec + args - shelly = ShellCommandTask( - name="shelly", - executable=cmd_exec, - input_spec=my_input_spec, - cache_dir=tmp_path, - ).split(text=["HELLO", "hi"]) - assert shelly.inputs.executable == cmd_exec + shelly = Shelly().split(text=["HELLO", "hi"]) + assert shelly.executable == cmd_exec - res = results_function(shelly, plugin) - assert res[0].output.stdout == "HELLO\n" - assert res[1].output.stdout == "hi\n" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout[0] == "HELLO\n" + assert outputs.stdout[1] == "hi\n" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_state_2(plugin, results_function, tmp_path): """ adding splitter to input that is used in the output_file_tamplate @@ -2027,38 +1485,28 @@ def test_shell_cmd_inputspec_state_2(plugin, results_function, tmp_path): cmd = "touch" args = ["newfile_1.txt", "newfile_2.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help_string": "output file", - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + + arg: str = shell.arg(argstr=None) + + class Outputs(ShellOutputs): + out1: File = shell.outarg( + path_template="{arg}", + help="output file", ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - cache_dir=tmp_path, - ).split(args=args) + shelly = Shelly(executable=cmd).split(arg=args) - res = results_function(shelly, plugin) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) for i in range(len(args)): - assert res[i].output.stdout == "" - assert res[i].output.out1.fspath.exists() - assert res[i].output.out1.fspath.parent == shelly.output_dir[i] + assert outputs.stdout[i] == "" + assert outputs.out1[i].fspath.exists() + assert outputs.out1[i].fspath.parent.parent == tmp_path -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_state_3(plugin, results_function, tmp_path): """adding state to the File-input from input_spec""" @@ -2071,41 +1519,28 @@ def test_shell_cmd_inputspec_state_3(plugin, results_function, tmp_path): cmd_exec = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "position": 1, - "help_string": "files", - "mandatory": True, - "argstr": "", - }, - ), - ) - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + file: File = shell.arg( + position=1, + help="files", + argstr="", + ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd_exec, - input_spec=my_input_spec, - cache_dir=tmp_path, - ).split(file=[file_1, file_2]) + class Outputs(ShellOutputs): + pass - assert shelly.inputs.executable == cmd_exec + shelly = Shelly().split(file=[file_1, file_2]) + + assert shelly.executable == cmd_exec # todo: this doesn't work when state # assert shelly.cmdline == "echo HELLO" - res = results_function(shelly, plugin) - assert res[0].output.stdout == "hello from pydra" - assert res[1].output.stdout == "have a nice one" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == ["hello from pydra", "have a nice one"] -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmp_path): """adding state to the File-input from input_spec""" @@ -2120,51 +1555,34 @@ def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmp_path files = [str(file1), str(file2)] cmd = ["sed", "-is", "s/hello/hi/"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help_string": "orig file", - "mandatory": True, - "copyfile": "copy", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{orig_file}", - "help_string": "output file", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + orig_file: File = shell.arg( + position=1, + argstr="", + help="orig file", + copy_mode="copy", + ) - shelly = ShellCommandTask( - name="shelly", + class Outputs(ShellOutputs): + out_file: File = shell.out( + callable=lambda orig_file: orig_file, + help="output file", + ) + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - cache_dir=tmp_path, ).split("orig_file", orig_file=files) txt_l = ["from pydra", "world"] - res_l = results_function(shelly, plugin) - for i, res in enumerate(res_l): - assert res.output.stdout == "" - assert res.output.out_file.fspath.exists() + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + for i in range(len(files)): + assert outputs.stdout[i] == "" + assert outputs.out_file[i].fspath.exists() # the file is copied, and than it is changed in place - assert res.output.out_file.fspath.parent == shelly.output_dir[i] - with open(res.output.out_file) as f: + assert outputs.out_file[i].fspath.parent.parent == tmp_path + with open(outputs.out_file[i]) as f: assert f"hi {txt_l[i]}\n" == f.read() # the original file is unchanged with open(files[i]) as f: @@ -2176,523 +1594,391 @@ def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmp_path @pytest.mark.flaky(reruns=2) # when dask def test_wf_shell_cmd_2(plugin_dask_opt, tmp_path): - """a workflow with input with defined output_file_template (str) + """a workflow with input with defined path_template (str) that requires wf.lzin """ - wf = Workflow(name="wf", input_spec=["cmd", "args"]) - - wf.inputs.cmd = "touch" - wf.inputs.args = "newfile.txt" - wf.cache_dir = tmp_path - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help_string": "output file", - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "touch" + + arg: str = shell.arg() + + class Outputs(ShellOutputs): + out1: File = shell.outarg( + path_template="{arg}", + help="output file", ) - ], - bases=(ShellSpec,), - ) - wf.add( - ShellCommandTask( - name="shelly", - input_spec=my_input_spec, - executable=wf.lzin.cmd, - args=wf.lzin.args, + @workflow.define(outputs=["out_f", "stdout"]) + def Workflow(cmd, arg): + + shelly = workflow.add( + Shelly( + executable=cmd, + arg=arg, + ) ) - ) - wf.set_output([("out_f", wf.shelly.lzout.out1), ("out", wf.shelly.lzout.stdout)]) + return shelly.out1, shelly.stdout - with Submitter(plugin=plugin_dask_opt) as sub: - wf(submitter=sub) + wf = Workflow(cmd="touch", arg="newfile.txt") - res = wf.result() - assert res.output.out == "" - assert res.output.out_f.fspath.exists() - assert res.output.out_f.fspath.parent == wf.output_dir + with Submitter(plugin=plugin_dask_opt, cache_dir=tmp_path) as sub: + res = sub(wf) + + assert res.outputs.stdout == "" + assert res.outputs.out_f.fspath.exists() + assert res.outputs.out_f.fspath.parent.parent == tmp_path def test_wf_shell_cmd_2a(plugin, tmp_path): - """a workflow with input with defined output_file_template (tuple) + """a workflow with input with defined path_template (tuple) that requires wf.lzin """ - wf = Workflow(name="wf", input_spec=["cmd", "args"]) - - wf.inputs.cmd = "touch" - wf.inputs.args = "newfile.txt" - wf.cache_dir = tmp_path - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help_string": "output file", - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "shelly" + + arg: str = shell.arg() + + class Outputs(ShellOutputs): + out1: File = shell.outarg( + path_template="{arg}", + help="output file", ) - ], - bases=(ShellSpec,), - ) - wf.add( - ShellCommandTask( - name="shelly", - input_spec=my_input_spec, - executable=wf.lzin.cmd, - args=wf.lzin.args, + @workflow.define(outputs=["out_f", "out"]) + def Workflow(cmd, arg): + + shelly = workflow.add( + Shelly( + executable=cmd, + arg=arg, + ) ) - ) - wf.set_output([("out_f", wf.shelly.lzout.out1), ("out", wf.shelly.lzout.stdout)]) + return shelly.out1, shelly.stdout + + wf = Workflow(cmd="touch", arg="newfile.txt") with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + res = sub(wf) - res = wf.result() - assert res.output.out == "" - assert res.output.out_f.fspath.exists() + assert res.outputs.out == "" + assert res.outputs.out_f.fspath.exists() def test_wf_shell_cmd_3(plugin, tmp_path): """a workflow with 2 tasks, - first one has input with output_file_template (str, uses wf.lzin), + first one has input with path_template (str, uses wf.lzin), that is passed to the second task """ - wf = Workflow(name="wf", input_spec=["cmd1", "cmd2", "args"]) - - wf.inputs.cmd1 = "touch" - wf.inputs.cmd2 = "cp" - wf.inputs.args = "newfile.txt" - wf.cache_dir = tmp_path - - my_input_spec1 = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help_string": "output file", - }, - ), + + @shell.define + class Shelly1(ShellDef["Shelly1.Outputs"]): + executable = "shelly" + + arg: str = shell.arg(argstr=None) + + class Outputs(ShellOutputs): + file: File = shell.outarg( + path_template="{arg}", + help="output file", ) - ], - bases=(ShellSpec,), - ) - my_input_spec2 = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "help_string": "output file", - "argstr": "", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "position": 2, - "argstr": "", - "output_file_template": "{orig_file}_copy", - "help_string": "output file", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly2(ShellDef["Shelly2.Outputs"]): + + executable = "shelly2" - wf.add( - ShellCommandTask( - name="shelly1", - input_spec=my_input_spec1, - executable=wf.lzin.cmd1, - args=wf.lzin.args, + orig_file: File = shell.arg( + position=1, + help="output file", + argstr="", ) - ) - wf.add( - ShellCommandTask( - name="shelly2", - input_spec=my_input_spec2, - executable=wf.lzin.cmd2, - orig_file=wf.shelly1.lzout.file, + + class Outputs(ShellOutputs): + out_file: File = shell.outarg( + position=2, + argstr="", + path_template="{orig_file}_copy", + help="output file", + ) + + @workflow.define(outputs=["touch_file", "out1", "cp_file", "out2"]) + def Workflow(cmd1, cmd2, arg): + + shelly1 = workflow.add( + Shelly1( + executable=cmd1, + arg=arg, + ) + ) + shelly2 = workflow.add( + Shelly2( + executable=cmd2, + orig_file=shelly1.file, + ) ) - ) - wf.set_output( - [ - ("touch_file", wf.shelly1.lzout.file), - ("out1", wf.shelly1.lzout.stdout), - ("cp_file", wf.shelly2.lzout.out_file), - ("out2", wf.shelly2.lzout.stdout), - ] - ) + return shelly1.file, shelly1.stdout, shelly2.out_file, shelly2.stdout - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + wf = Workflow(cmd1="touch", cmd2="cp", arg="newfile.txt") - res = wf.result() - assert res.output.out1 == "" - assert res.output.touch_file.fspath.exists() - assert res.output.touch_file.fspath.parent == wf.output_dir - assert res.output.out2 == "" - assert res.output.cp_file.fspath.exists() - assert res.output.cp_file.fspath.parent == wf.output_dir + with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: + res = sub(wf) + + assert res.outputs.out1 == "" + assert res.outputs.touch_file.fspath.exists() + assert res.outputs.touch_file.fspath.parent.parent == tmp_path + assert res.outputs.out2 == "" + assert res.outputs.cp_file.fspath.exists() + assert res.outputs.cp_file.fspath.parent.parent == tmp_path def test_wf_shell_cmd_3a(plugin, tmp_path): """a workflow with 2 tasks, - first one has input with output_file_template (str, uses wf.lzin), + first one has input with path_template (str, uses wf.lzin), that is passed to the second task """ - wf = Workflow(name="wf", input_spec=["cmd1", "cmd2", "args"]) - - wf.inputs.cmd1 = "touch" - wf.inputs.cmd2 = "cp" - wf.inputs.args = "newfile.txt" - wf.cache_dir = tmp_path - - my_input_spec1 = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help_string": "output file", - }, - ), + + @shell.define + class Shelly1(ShellDef["Shelly1.Outputs"]): + executable = "shelly" + arg: str = shell.outarg(argstr=None) + + class Outputs(ShellOutputs): + + file: File = shell.outarg( + path_template="{arg}", + help="output file", ) - ], - bases=(ShellSpec,), - ) - my_input_spec2 = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "output file", - "argstr": "", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "position": 2, - "argstr": "", - "output_file_template": "{orig_file}_cp", - "help_string": "output file", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly2(ShellDef["Shelly2.Outputs"]): + executable = "shelly2" + orig_file: str = shell.arg( + position=1, + help="output file", + argstr="", + ) + + class Outputs(ShellOutputs): + out_file: File = shell.outarg( + position=2, + argstr="", + path_template="{orig_file}_cp", + help="output file", + ) + + @workflow.define(outputs=["touch_file", "out1", "cp_file", "out2"]) + def Workflow(cmd1, cmd2, arg): - wf.add( - ShellCommandTask( - name="shelly1", - input_spec=my_input_spec1, - executable=wf.lzin.cmd1, - args=wf.lzin.args, + shelly1 = workflow.add( + Shelly1( + executable=cmd1, + arg=arg, + ) ) - ) - wf.add( - ShellCommandTask( - name="shelly2", - input_spec=my_input_spec2, - executable=wf.lzin.cmd2, - orig_file=wf.shelly1.lzout.file, + shelly2 = workflow.add( + Shelly2( + executable=cmd2, + orig_file=shelly1.file, + ) ) - ) - wf.set_output( - [ - ("touch_file", wf.shelly1.lzout.file), - ("out1", wf.shelly1.lzout.stdout), - ("cp_file", wf.shelly2.lzout.out_file), - ("out2", wf.shelly2.lzout.stdout), - ] - ) + return shelly1.file, shelly1.stdout, shelly2.out_file, shelly2.stdout + + wf = Workflow(cmd1="touch", cmd2="cp", arg="newfile.txt") with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + res = sub(wf) - res = wf.result() - assert res.output.out1 == "" - assert res.output.touch_file.fspath.exists() - assert res.output.out2 == "" - assert res.output.cp_file.fspath.exists() + assert res.outputs.out1 == "" + assert res.outputs.touch_file.fspath.exists() + assert res.outputs.out2 == "" + assert res.outputs.cp_file.fspath.exists() def test_wf_shell_cmd_state_1(plugin, tmp_path): """a workflow with 2 tasks and splitter on the wf level, - first one has input with output_file_template (str, uses wf.lzin), + first one has input with path_template (str, uses wf.lzin), that is passed to the second task """ - wf = Workflow( - name="wf", input_spec=["cmd1", "cmd2", "args"], cache_dir=tmp_path - ).split("args", args=["newfile_1.txt", "newfile_2.txt"]) - - wf.inputs.cmd1 = "touch" - wf.inputs.cmd2 = "cp" - - my_input_spec1 = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help_string": "output file", - }, - ), + + @shell.define + class Shelly1(ShellDef["Shelly1.Outputs"]): + executable = "shelly1" + + arg: str = shell.arg(argstr=None) + + class Outputs(ShellOutputs): + file: File = shell.outarg( + path_template="{arg}", + help="output file", ) - ], - bases=(ShellSpec,), - ) - my_input_spec2 = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "output file", - "argstr": "", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "position": 2, - "argstr": "", - "output_file_template": "{orig_file}_copy", - "help_string": "output file", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly2(ShellDef["Shelly2.Outputs"]): + executable = "shelly2" + orig_file: str = shell.arg( + position=1, + help="output file", + argstr="", + ) - wf.add( - ShellCommandTask( - name="shelly1", - input_spec=my_input_spec1, - executable=wf.lzin.cmd1, - args=wf.lzin.args, + class Outputs(ShellOutputs): + out_file: File = shell.outarg( + position=2, + argstr="", + path_template="{orig_file}_copy", + help="output file", + ) + + @workflow.define(outputs=["touch_file", "out1", "cp_file", "out2"]) + def Workflow(cmd1, cmd2, arg): + + shelly1 = workflow.add( + Shelly1( + executable=cmd1, + arg=arg, + ) ) - ) - wf.add( - ShellCommandTask( - name="shelly2", - input_spec=my_input_spec2, - executable=wf.lzin.cmd2, - orig_file=wf.shelly1.lzout.file, + shelly2 = workflow.add( + Shelly2( + executable=cmd2, + orig_file=shelly1.file, + ) ) - ) - wf.set_output( - [ - ("touch_file", wf.shelly1.lzout.file), - ("out1", wf.shelly1.lzout.stdout), - ("cp_file", wf.shelly2.lzout.out_file), - ("out2", wf.shelly2.lzout.stdout), - ] - ) + return shelly1.file, shelly1.stdout, shelly2.out_file, shelly2.stdout - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + wf = Workflow(cmd1="touch", cmd2="cp").split(arg=["newfile_1.txt", "newfile_2.txt"]) - res_l = wf.result() - for i, res in enumerate(res_l): - assert res.output.out1 == "" - assert res.output.touch_file.fspath.exists() - assert res.output.touch_file.fspath.parent == wf.output_dir[i] - assert res.output.out2 == "" - assert res.output.cp_file.fspath.exists() - assert res.output.cp_file.fspath.parent == wf.output_dir[i] + with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: + res = sub(wf) + + for i in range(2): + assert res.outputs.out1[i] == "" + assert res.outputs.touch_file[i].fspath.exists() + assert res.outputs.touch_file[i].fspath.parent.parent == tmp_path + assert res.outputs.out2[i] == "" + assert res.outputs.cp_file[i].fspath.exists() + assert res.outputs.cp_file[i].fspath.parent.parent == tmp_path def test_wf_shell_cmd_ndst_1(plugin, tmp_path): """a workflow with 2 tasks and a splitter on the node level, - first one has input with output_file_template (str, uses wf.lzin), + first one has input with path_template (str, uses wf.lzin), that is passed to the second task """ - wf = Workflow(name="wf", input_spec=["cmd1", "cmd2", "args"]) - - wf.inputs.cmd1 = "touch" - wf.inputs.cmd2 = "cp" - wf.inputs.args = ["newfile_1.txt", "newfile_2.txt"] - wf.cache_dir = tmp_path - - my_input_spec1 = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help_string": "output file", - }, - ), + + @shell.define + class Shelly1(ShellDef["Shelly1.Outputs"]): + executable = "shelly" + + arg: str = shell.arg(argstr=None) + + class Outputs(ShellOutputs): + file: File = shell.outarg( + path_template="{arg}", + help="output file", ) - ], - bases=(ShellSpec,), - ) - my_input_spec2 = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "output file", - "argstr": "", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "position": 2, - "argstr": "", - "output_file_template": "{orig_file}_copy", - "help_string": "output file", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly2(ShellDef["Shelly2.Outputs"]): + executable = "shelly2" - wf.add( - ShellCommandTask( - name="shelly1", - input_spec=my_input_spec1, - executable=wf.lzin.cmd1, - ).split("args", args=wf.lzin.args) - ) - wf.add( - ShellCommandTask( - name="shelly2", - input_spec=my_input_spec2, - executable=wf.lzin.cmd2, - orig_file=wf.shelly1.lzout.file, + orig_file: str = shell.arg( + position=1, + help="output file", + argstr="", ) - ) - wf.set_output( - [ - ("touch_file", wf.shelly1.lzout.file), - ("out1", wf.shelly1.lzout.stdout), - ("cp_file", wf.shelly2.lzout.out_file), - ("out2", wf.shelly2.lzout.stdout), - ] + class Outputs(ShellOutputs): + out_file: File = shell.outarg( + position=2, + argstr="", + path_template="{orig_file}_copy", + help="output file", + ) + + @workflow.define(outputs=["touch_file", "out1", "cp_file", "out2"]) + def Workflow(cmd1, cmd2, args): + + shelly1 = workflow.add( + Shelly1( + executable=cmd1, + ).split("arg", arg=args) + ) + shelly2 = workflow.add( + Shelly2( + executable=cmd2, + orig_file=shelly1.file, + ) + ) + + return shelly1.file, shelly1.stdout, shelly2.out_file, shelly2.stdout + + wf = Workflow( + cmd1="touch", + cmd2="cp", + args=["newfile_1.txt", "newfile_2.txt"], ) - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: + res = sub(wf) - res = wf.result() - assert res.output.out1 == ["", ""] - assert all([file.fspath.exists() for file in res.output.touch_file]) - assert res.output.out2 == ["", ""] - assert all([file.fspath.exists() for file in res.output.cp_file]) + assert res.outputs.out1 == ["", ""] + assert all([file.fspath.exists() for file in res.outputs.touch_file]) + assert res.outputs.out2 == ["", ""] + assert all([file.fspath.exists() for file in res.outputs.cp_file]) -# customised output spec +# customised output definition -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_1(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, providing specific pathname """ cmd = ["touch", "newfile_tmp.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "newfile_tmp.txt")], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path + Shelly = shell.define( + cmd, + outputs=[ + shell.out(name="newfile", type=File, callable=lambda: "newfile_tmp.txt") + ], ) + shelly = Shelly() - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.newfile.fspath.exists() + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.newfile.fspath.exists() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_1a(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, providing specific pathname """ cmd = ["touch", "newfile_tmp.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", attr.ib(type=File, default="newfile_tmp.txt"))], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.newfile.fspath.exists() + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + newfile: File = shell.outarg(path_template="newfile_tmp.txt") + + shelly = Shelly() + + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.newfile.fspath.exists() def test_shell_cmd_outputspec_1b_exception(plugin, tmp_path): @@ -2700,14 +1986,16 @@ def test_shell_cmd_outputspec_1b_exception(plugin, tmp_path): customised output_spec, adding files to the output, providing specific pathname """ cmd = ["touch", "newfile_tmp.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "newfile_tmp_.txt")], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + newfile: File = shell.out(callable=lambda: "newfile_tmp_.txt") + + shelly = Shelly() with pytest.raises(Exception) as exinfo: with Submitter(plugin=plugin) as sub: @@ -2715,25 +2003,27 @@ def test_shell_cmd_outputspec_1b_exception(plugin, tmp_path): assert "does not exist" in str(exinfo.value) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_2(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, using a wildcard in default """ cmd = ["touch", "newfile_tmp.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "newfile_*.txt")], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.newfile.fspath.exists() + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + newfile: File = shell.outarg(path_template="newfile_*.txt") + + shelly = Shelly() + + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.newfile.fspath.exists() def test_shell_cmd_outputspec_2a_exception(plugin, tmp_path): @@ -2742,45 +2032,47 @@ def test_shell_cmd_outputspec_2a_exception(plugin, tmp_path): using a wildcard in default """ cmd = ["touch", "newfile_tmp.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "newfile_*K.txt")], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ) - with pytest.raises(Exception) as excinfo: - with Submitter(plugin=plugin) as sub: - shelly(submitter=sub) - assert "no file matches" in str(excinfo.value) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + newfile: File = shell.out(default="newfile_*K.txt") + + shelly = Shelly() + + with pytest.raises(FileNotFoundError): + shelly(cache_dir=tmp_path, worker="debug") -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_3(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, using a wildcard in default, should collect two files """ cmd = ["touch", "newfile_tmp1.txt", "newfile_tmp2.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", MultiOutputFile, "newfile_*.txt")], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + newfile: MultiOutputFile = "newfile_*.txt" + + shelly = Shelly() + + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" # newfile is a list - assert len(res.output.newfile) == 2 - assert all([file.fspath.exists() for file in res.output.newfile]) + assert len(outputs.newfile) == 2 + assert all([file.fspath.exists() for file in outputs.newfile]) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_5(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, @@ -2793,33 +2085,25 @@ def gather_output(field, output_dir): if field.name == "newfile": return list(Path(output_dir).expanduser().glob("newfile*.txt")) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile", - attr.ib(type=MultiOutputFile, metadata={"callable": gather_output}), - ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd - res = results_function(shelly, plugin) - assert res.output.stdout == "" + class Outputs(ShellOutputs): + newfile: MultiOutputFile = shell.out(callable=gather_output) + + shelly = Shelly() + + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" # newfile is a list - assert len(res.output.newfile) == 2 - assert all([file.fspath.exists() for file in res.output.newfile]) - assert ( - shelly.output_names - == shelly.generated_output_names - == ["return_code", "stdout", "stderr", "newfile"] - ) + assert len(outputs.newfile) == 2 + assert all([file.fspath.exists() for file in outputs.newfile]) + assert get_output_names(shelly) == ["newfile", "return_code", "stderr", "stdout"] -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_5a(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, @@ -2832,28 +2116,25 @@ def gather_output(executable, output_dir): files = executable[1:] return [Path(output_dir) / file for file in files] - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile", - attr.ib(type=MultiOutputFile, metadata={"callable": gather_output}), - ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + + newfile: MultiOutputFile = shell.out(callable=gather_output) + + shelly = Shelly() - res = results_function(shelly, plugin) - assert res.output.stdout == "" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" # newfile is a list - assert len(res.output.newfile) == 2 - assert all([file.fspath.exists() for file in res.output.newfile]) + assert len(outputs.newfile) == 2 + assert all([file.fspath.exists() for file in outputs.newfile]) -def test_shell_cmd_outputspec_5b_error(): +def test_shell_cmd_outputspec_5b_error(tmp_path): """ customised output_spec, adding files to the output, using a function to collect output, the function is saved in the field metadata @@ -2865,115 +2146,112 @@ def gather_output(executable, output_dir, ble): files = executable[1:] return [Path(output_dir) / file for file in files] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", attr.ib(type=File, metadata={"callable": gather_output}))], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask(name="shelly", executable=cmd, output_spec=my_output_spec) - with pytest.raises(AttributeError, match="ble"): - shelly() + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + newfile: File = shell.out(callable=gather_output) + shelly = Shelly() + with pytest.raises(KeyError, match="ble"): + shelly(cache_dir=tmp_path) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) + +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_5c(plugin, results_function, tmp_path): """ - Customised output spec defined as a class, + Customised output defined as a class, using a static function to collect output files. """ - @attr.s(kw_only=True) - class MyOutputSpec(ShellOutSpec): - @staticmethod - def gather_output(executable, output_dir): - files = executable[1:] - return [Path(output_dir) / file for file in files] + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): - newfile: MultiOutputFile = attr.ib(metadata={"callable": gather_output}) + executable = ["touch", "newfile_tmp1.txt", "newfile_tmp2.txt"] - shelly = ShellCommandTask( - name="shelly", - executable=["touch", "newfile_tmp1.txt", "newfile_tmp2.txt"], - output_spec=SpecInfo(name="Output", bases=(MyOutputSpec,)), - cache_dir=tmp_path, - ) + class Outputs(ShellOutputs): + + @staticmethod + def gather_output(executable, output_dir): + files = executable[1:] + return [Path(output_dir) / file for file in files] + + newfile: MultiOutputFile = shell.out(callable=gather_output) - res = results_function(shelly, plugin) - assert res.output.stdout == "" + shelly = Shelly() + + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" # newfile is a list - assert len(res.output.newfile) == 2 - assert all([file.exists() for file in res.output.newfile]) + assert len(outputs.newfile) == 2 + assert all([file.exists() for file in outputs.newfile]) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_6(plugin, results_function, tmp_path): """ - providing output name by providing output_file_template + providing output name by providing path_template (similar to the previous example, but not touching input_spec) """ cmd = "touch" - args = "newfile_tmp.txt" - - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "out1", - attr.ib( - type=File, - metadata={ - "output_file_template": "{args}", - "help_string": "output file", - }, - ), + arg = "newfile_tmp.txt" + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + arg: str = shell.arg() + + class Outputs(ShellOutputs): + + out1: File = shell.outarg( + path_template="{arg}", + help="output file", ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", + shelly = Shelly( executable=cmd, - args=args, - output_spec=my_output_spec, - cache_dir=tmp_path, + arg=arg, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.out1.fspath.exists() + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out1.fspath.exists() -def test_shell_cmd_outputspec_6a(): +def test_shell_cmd_outputspec_6a(tmp_path): """ - providing output name by providing output_file_template + providing output name by providing path_template (using shorter syntax) """ cmd = "touch" - args = "newfile_tmp.txt" - - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "out1", - File, - {"output_file_template": "{args}", "help_string": "output file"}, + arg = "newfile_tmp.txt" + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + arg: str = shell.arg(argstr=None) + + class Outputs(ShellOutputs): + + out1: File = shell.outarg( + path_template="{arg}", + help="output file", ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", executable=cmd, args=args, output_spec=my_output_spec - ) + shelly = Shelly(arg=arg) - res = shelly() - assert res.output.stdout == "" - assert res.output.out1.fspath.exists() + outputs = shelly(cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out1.fspath.exists() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_7(tmp_path, plugin, results_function): """ providing output with output_file_name and using MultiOutputFile as a type. @@ -2985,71 +2263,41 @@ def test_shell_cmd_outputspec_7(tmp_path, plugin, results_function): cmd = "bash" new_files_id = ["1", "2", "3"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "script", - attr.ib( - type=File, - metadata={ - "help_string": "script file", - "mandatory": True, - "position": 1, - "argstr": "", - }, - ), - ), - ( - "files_id", - attr.ib( - type=MultiInputObj, - metadata={ - "position": 2, - "argstr": "...", - "sep": " ", - "help_string": "list of name indices", - "mandatory": True, - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + script: File = shell.arg( + help="script file", + position=1, + argstr="", + ) + files_id: MultiInputObj = shell.arg( + position=2, + argstr="...", + sep=" ", + help="list of name indices", + ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "new_files", - attr.ib( - type=MultiOutputFile, - metadata={ - "output_file_template": "file{files_id}.txt", - "help_string": "output file", - }, - ), + class Outputs(ShellOutputs): + + new_files: MultiOutputFile = shell.outarg( + argstr=None, + path_template="file{files_id}.txt", + help="output file", ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, + shelly = Shelly( script=file, files_id=new_files_id, ) - res = results_function(shelly, "serial") - assert res.output.stdout == "" - for file in res.output.new_files: + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + for file in outputs.new_files: assert file.fspath.exists() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_7a(tmp_path, plugin, results_function): """ providing output with output_file_name and using MultiOutputFile as a type. @@ -3061,60 +2309,30 @@ def test_shell_cmd_outputspec_7a(tmp_path, plugin, results_function): cmd = "bash" new_files_id = "1" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "script", - attr.ib( - type=File, - metadata={ - "help_string": "script file", - "mandatory": True, - "position": 1, - "argstr": "", - }, - ), - ), - ( - "files_id", - attr.ib( - type=MultiInputObj, - metadata={ - "position": 2, - "argstr": "...", - "sep": " ", - "help_string": "list of name indices", - "mandatory": True, - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + script: File = shell.arg( + help="script file", + position=1, + argstr="", + ) + files_id: MultiInputObj = shell.arg( + position=2, + argstr="...", + sep=" ", + help="list of name indices", + ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "new_files", - attr.ib( - type=MultiOutputFile, - metadata={ - "output_file_template": "file{files_id}.txt", - "help_string": "output file", - }, - ), + class Outputs(ShellOutputs): + + new_files: MultiOutputFile = shell.outarg( + argstr=None, + path_template="file{files_id}.txt", + help="output file", ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, + shelly = Shelly( script=file, files_id=new_files_id, ) @@ -3122,19 +2340,19 @@ def test_shell_cmd_outputspec_7a(tmp_path, plugin, results_function): # XXX: Figure out why this fails with "cf". Occurs in CI when using Ubuntu + Python >= 3.10 # (but not when using macOS + Python >= 3.10). Same error occurs in test_shell_cmd_inputspec_11 # see https://github.com/nipype/pydra/issues/671 - res = results_function(shelly, "serial") - assert res.output.stdout == "" - assert res.output.new_files.fspath.exists() + outputs = results_function(shelly, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.new_files.fspath.exists() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_8a(tmp_path, plugin, results_function): """ customised output_spec, adding int and str to the output, requiring two callables with parameters stdout and stderr """ cmd = "echo" - args = ["newfile_1.txt", "newfile_2.txt"] + args = [["newfile_1.txt"], ["newfile_2.txt"]] def get_file_index(stdout): stdout = re.sub(r".*_", "", stdout) @@ -3145,79 +2363,50 @@ def get_file_index(stdout): def get_stderr(stderr): return f"stderr: {stderr}" - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "out1", - attr.ib( - type=File, - metadata={ - "output_file_template": "{args}", - "help_string": "output file", - }, - ), - ), - ( - "out_file_index", - attr.ib( - type=int, - metadata={"help_string": "output file", "callable": get_file_index}, - ), - ), - ( - "stderr_field", - attr.ib( - type=str, - metadata={ - "help_string": "The standard error output", - "callable": get_stderr, - }, - ), - ), - ], - bases=(ShellOutSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + + out_file_index: int = shell.out( + help="output file", + callable=get_file_index, + ) + stderr_field: str = shell.out( + help="The standard error output", + callable=get_stderr, + ) - shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ).split("args", args=args) + shelly = Shelly().split(additional_args=args) - results = results_function(shelly, plugin) - for index, res in enumerate(results): - assert res.output.out_file_index == index + 1 - assert res.output.stderr_field == f"stderr: {res.output.stderr}" + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + for index in range(2): + assert outputs.out_file_index[index] == index + 1 + assert outputs.stderr_field[index] == f"stderr: {outputs.stderr[index]}" -def test_shell_cmd_outputspec_8b_error(): +def test_shell_cmd_outputspec_8b_error(tmp_path): """ customised output_spec, adding Int to the output, requiring a function to collect output """ - cmd = "echo" - args = ["newfile_1.txt", "newfile_2.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "out", - attr.ib( - type=int, metadata={"help_string": "output file", "value": "val"} - ), - ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec - ).split("args", args=args) - with pytest.raises(Exception) as e: - shelly() - assert "has to have a callable" in str(e.value) + with pytest.raises( + ValueError, + match="A shell output field must have either a callable or a path_template", + ): + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "echo" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) + class Outputs(ShellOutputs): + out: int = shell.out(help="output file") + + +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_8c(tmp_path, plugin, results_function): """ customised output_spec, adding Directory to the output named by args @@ -3229,41 +2418,32 @@ def get_lowest_directory(directory_path): cmd = "mkdir" args = [f"{tmp_path}/dir1", f"{tmp_path}/dir2"] - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "resultsDir", - attr.ib( - type=Directory, - metadata={ - "output_file_template": "{args}", - "help_string": "output file", - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + arg: str = shell.arg() + + class Outputs(ShellOutputs): + + resultsDir: Directory = shell.outarg( + path_template="{arg}", + help="output file", ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - output_spec=my_output_spec, - resultsDir="outdir", - cache_dir=tmp_path, - ).split("args", args=args) + shelly = Shelly(resultsDir="outdir").split(arg=args) - results_function(shelly, plugin) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) for index, arg_dir in enumerate(args): assert Path(Path(tmp_path) / Path(arg_dir)).exists() assert get_lowest_directory(arg_dir) == f"/dir{index+1}" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_8d(tmp_path, plugin, results_function): """ - customised output_spec, adding Directory to the output named by input spec + customised output_spec, adding Directory to the output named by input definition """ # For /tmp/some_dict/test this function returns "/test" @@ -3272,99 +2452,61 @@ def get_lowest_directory(directory_path): cmd = "mkdir" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "resultsDir", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "new directory", - "argstr": "", - }, - ), - ) - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + resultsDir: str = shell.arg( + position=1, + help="new directory", + argstr="", + ) + + class Outputs(ShellOutputs): - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "resultsDir", - attr.ib( - type=Directory, - metadata={ - "output_file_template": "{resultsDir}", - "help_string": "output file", - }, - ), + resultsDir: Directory = shell.outarg( + path_template="{resultsDir}", + help="output file", ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name=cmd, - executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, - cache_dir=tmp_path, - resultsDir="test", # Path(tmp_path) / "test" TODO: Not working without absolute path support - ) - assert ( - shelly.output_names - == shelly.generated_output_names - == ["return_code", "stdout", "stderr", "resultsDir"] - ) - res = results_function(shelly, plugin) - print("Cache_dirr:", shelly.cache_dir) - assert (shelly.output_dir / Path("test")).exists() - assert get_lowest_directory(res.output.resultsDir) == get_lowest_directory( - shelly.output_dir / Path("test") + shelly = Shelly(resultsDir="test") + assert get_output_names(shelly) == ["resultsDir", "return_code", "stderr", "stdout"] + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + output_dir = next(tmp_path.iterdir()) + assert (output_dir / Path("test")).exists() + assert get_lowest_directory(outputs.resultsDir) == get_lowest_directory( + output_dir / Path("test") ) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_state_outputspec_1(plugin, results_function, tmp_path): """ - providing output name by providing output_file_template + providing output name by providing path_template splitter for a field that is used in the template """ cmd = "touch" args = ["newfile_1.txt", "newfile_2.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "out1", - attr.ib( - type=File, - metadata={ - "output_file_template": "{args}", - "help_string": "output file", - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + arg: str = shell.arg() + + class Outputs(ShellOutputs): + + out1: File = shell.outarg( + path_template="{arg}", + help="output file", ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - output_spec=my_output_spec, - cache_dir=tmp_path, - ).split("args", args=args) + shelly = Shelly(executable=cmd).split(arg=args) - res = results_function(shelly, plugin) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) for i in range(len(args)): - assert res[i].output.stdout == "" - assert res[i].output.out1.fspath.exists() + assert outputs.stdout[i] == "" + assert outputs.out1[i].fspath.exists() # customised output_spec for tasks in workflows @@ -3377,728 +2519,467 @@ def test_shell_cmd_outputspec_wf_1(plugin, tmp_path): """ cmd = ["touch", "newfile_tmp.txt"] - wf = Workflow(name="wf", input_spec=["cmd"]) - wf.inputs.cmd = cmd - wf.cache_dir = tmp_path - - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "newfile_tmp.txt")], - bases=(ShellOutSpec,), - ) - wf.add( - ShellCommandTask( - name="shelly", executable=wf.lzin.cmd, output_spec=my_output_spec - ) - ) - wf.set_output( - [("stdout", wf.shelly.lzout.stdout), ("newfile", wf.shelly.lzout.newfile)] - ) - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = "shelly" + + class Outputs(ShellOutputs): + newfile: File = shell.outarg(path_template="newfile_tmp.txt") + + @workflow.define(outputs=["stdout", "newfile"]) + def Workflow(cmd): + shelly = workflow.add(Shelly(executable=cmd)) + return shelly.stdout, shelly.newfile + + wf = Workflow(cmd=cmd) - res = wf.result() - assert res.output.stdout == "" - assert res.output.newfile.fspath.exists() + with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: + res = sub(wf) + + assert res.outputs.stdout == "" + assert res.outputs.newfile.fspath.exists() # checking if the file was copied to the wf dir - assert res.output.newfile.fspath.parent == wf.output_dir + assert res.outputs.newfile.fspath.parent.parent == tmp_path -def test_shell_cmd_inputspec_outputspec_1(): +def test_shell_cmd_inputspec_outputspec_1(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in templates """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, - ), - ( - "file2", - str, - {"help_string": "2nd creadted file", "argstr": "", "position": 2}, - ), - ], - bases=(ShellSpec,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - {"output_file_template": "{file1}", "help_string": "newfile 1"}, - ), - ( - "newfile2", - File, - {"output_file_template": "{file2}", "help_string": "newfile 2"}, - ), - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + file1: File = shell.arg(help="1st creadted file", argstr="", position=1) + file2: File = shell.arg(help="2nd creadted file", argstr="", position=2) + + class Outputs(ShellOutputs): + newfile1: File = shell.outarg(path_template="{file1}", help="newfile 1") + newfile2: File = shell.outarg(path_template="{file2}", help="newfile 2") + + executable = cmd + + shelly = Shelly( + file1=File.mock("new_file_1.txt"), file2=File.mock("new_file_2.txt") ) - shelly.inputs.file1 = "new_file_1.txt" - shelly.inputs.file2 = "new_file_2.txt" - res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() - assert res.output.newfile2.fspath.exists() + outputs = shelly(cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() + assert outputs.newfile2.fspath.exists() -def test_shell_cmd_inputspec_outputspec_1a(): +def test_shell_cmd_inputspec_outputspec_1a(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in templates, file2 is used in a template for newfile2, but it is not provided, so newfile2 is set to NOTHING """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, - ), - ( - "file2", - str, - {"help_string": "2nd creadted file", "argstr": "", "position": 2}, - ), - ], - bases=(ShellSpec,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - {"output_file_template": "{file1}", "help_string": "newfile 1"}, - ), - ( - "newfile2", - File, - {"output_file_template": "{file2}", "help_string": "newfile 2"}, - ), - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + file2: str | None = shell.arg( + default=None, help="2nd creadted file", argstr="", position=2 + ) + + class Outputs(ShellOutputs): + + newfile1: File = shell.out(callable=lambda file1: file1, help="newfile 1") + newfile2: File | None = shell.out( + callable=lambda file2: file2, help="newfile 2" + ) + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" + shelly.file1 = File.mock("new_file_1.txt") - res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() + outputs = shelly(cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() # newfile2 is not created, since file2 is not provided - assert res.output.newfile2 is attr.NOTHING + assert outputs.newfile2 is None -def test_shell_cmd_inputspec_outputspec_2(): +def test_shell_cmd_inputspec_outputspec_2(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires filed """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, - ), - ( - "file2", - str, - {"help_string": "2nd creadted file", "argstr": "", "position": 2}, - ), - ], - bases=(ShellSpec,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help_string": "newfile 1", - "requires": ["file1"], - }, - ), - ( - "newfile2", - File, - { - "output_file_template": "{file2}", - "help_string": "newfile 1", - "requires": ["file1", "file2"], - }, - ), - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, - ) - shelly.inputs.file1 = "new_file_1.txt" - shelly.inputs.file2 = "new_file_2.txt" - # all fields from output_spec should be in output_names and generated_output_names - assert ( - shelly.output_names - == shelly.generated_output_names - == ["return_code", "stdout", "stderr", "newfile1", "newfile2"] - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + file2: str = shell.arg(help="2nd creadted file", argstr="", position=2) + + class Outputs(ShellOutputs): + + newfile1: File | None = shell.out( + callable=lambda file1: file1, + help="newfile 1", + requires=["file1"], + ) + newfile2: File | None = shell.out( + callable=lambda file2: file2, + help="newfile 1", + requires=["file1", "file2"], + ) + + shelly = Shelly(file1="new_file_1.txt", file2="new_file_2.txt") + assert get_output_names(shelly) == [ + "newfile1", + "newfile2", + "return_code", + "stderr", + "stdout", + ] - res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() - assert res.output.newfile2.fspath.exists() + outputs = shelly(cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() + assert outputs.newfile2.fspath.exists() -def test_shell_cmd_inputspec_outputspec_2a(): +def test_shell_cmd_inputspec_outputspec_2a(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires filed """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, - ), - ( - "file2", - str, - {"help_string": "2nd creadted file", "argstr": "", "position": 2}, - ), - ], - bases=(ShellSpec,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help_string": "newfile 1", - "requires": ["file1"], - }, - ), - ( - "newfile2", - File, - { - "output_file_template": "{file2}", - "help_string": "newfile 1", - "requires": ["file1", "file2"], - }, - ), - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + file2: str | None = shell.arg( + default=None, help="2nd creadted file", argstr="", position=2 + ) + + class Outputs(ShellOutputs): + + newfile1: File | None = shell.out( + callable=lambda file1: file1, + help="newfile 1", + requires=["file1"], + ) + newfile2: File | None = shell.out( + callable=lambda file2: file2, + help="newfile 1", + requires=["file1", "file2"], + ) + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" - # generated_output_names should know that newfile2 will not be generated - assert shelly.output_names == [ - "return_code", - "stdout", - "stderr", + shelly.file1 = File.mock("new_file_1.txt") + assert get_output_names(shelly) == [ "newfile1", "newfile2", - ] - assert shelly.generated_output_names == [ "return_code", - "stdout", "stderr", - "newfile1", + "stdout", ] - res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() - assert res.output.newfile2 is attr.NOTHING + outputs = shelly(cache_dir=tmp_path) + + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() + assert outputs.newfile2 is None -def test_shell_cmd_inputspec_outputspec_3(): +def test_shell_cmd_inputspec_outputspec_3(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires filed adding one additional input that is not in the template, but in the requires field, """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, - ), - ( - "file2", - str, - {"help_string": "2nd creadted file", "argstr": "", "position": 2}, - ), - ("additional_inp", int, {"help_string": "additional inp"}), - ], - bases=(ShellSpec,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - {"output_file_template": "{file1}", "help_string": "newfile 1"}, - ), - ( - "newfile2", - File, - { - "output_file_template": "{file2}", - "help_string": "newfile 1", - "requires": ["file1", "additional_inp"], - }, - ), - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, - ) - shelly.inputs.file1 = "new_file_1.txt" - shelly.inputs.file2 = "new_file_2.txt" - shelly.inputs.additional_inp = 2 + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + file2: str = shell.arg(help="2nd creadted file", argstr="", position=2) + additional_inp: int = shell.arg(help="additional inp") + + class Outputs(ShellOutputs): + + newfile1: File = shell.out(callable=lambda file1: file1, help="newfile 1") + newfile2: File | None = shell.out( + callable=lambda file2: file2, + help="newfile 1", + requires=["file1", "additional_inp"], + ) + + shelly = Shelly(executable=cmd) + shelly.file1 = "new_file_1.txt" + shelly.file2 = "new_file_2.txt" + shelly.additional_inp = 2 - res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() - assert res.output.newfile2.fspath.exists() + outputs = shelly(cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() + assert outputs.newfile2.fspath.exists() -def test_shell_cmd_inputspec_outputspec_3a(): +def test_shell_cmd_inputspec_outputspec_3a(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires filed adding one additional input that is not in the template, but in the requires field, the additional input not provided, so the output is NOTHING """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, - ), - ( - "file2", - str, - {"help_string": "2nd creadted file", "argstr": "", "position": 2}, - ), - ("additional_inp", str, {"help_string": "additional inp"}), - ], - bases=(ShellSpec,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - {"output_file_template": "{file1}", "help_string": "newfile 1"}, - ), - ( - "newfile2", - File, - { - "output_file_template": "{file2}", - "help_string": "newfile 1", - "requires": ["file1", "additional_inp"], - }, - ), - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + file2: str | None = shell.arg(help="2nd creadted file", argstr="", position=2) + additional_inp: str | None = shell.arg(default=None, help="additional inp") + + class Outputs(ShellOutputs): + + newfile1: File = shell.out(callable=lambda file1: file1, help="newfile 1") + newfile2: File | None = shell.out( + callable=lambda file2: file2, + help="newfile 1", + requires=["file1", "additional_inp"], + ) + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" - shelly.inputs.file2 = "new_file_2.txt" - # generated_output_names should know that newfile2 will not be generated - assert shelly.output_names == [ - "return_code", - "stdout", - "stderr", + shelly.file1 = "new_file_1.txt" + shelly.file2 = "new_file_2.txt" + assert get_output_names(shelly) == [ "newfile1", "newfile2", - ] - assert shelly.generated_output_names == [ "return_code", - "stdout", "stderr", - "newfile1", + "stdout", ] - res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() + shelly.file2 = None + outputs = shelly(cache_dir=tmp_path) + + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() # additional input not provided so no newfile2 set (even if the file was created) - assert res.output.newfile2 is attr.NOTHING + assert outputs.newfile2 is None -def test_shell_cmd_inputspec_outputspec_4(): +def test_shell_cmd_inputspec_outputspec_4(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires filed adding one additional input to the requires together with a list of the allowed values, """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, - ), - ("additional_inp", int, {"help_string": "additional inp"}), - ], - bases=(ShellSpec,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help_string": "newfile 1", - "requires": ["file1", ("additional_inp", [2, 3])], - }, + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + additional_inp: int | None = shell.arg(help="additional inp", default=None) + + class Outputs(ShellOutputs): + + newfile1: File | None = shell.out( + callable=lambda file1: file1, + help="newfile 1", + requires=["file1", ("additional_inp", [2, 3])], ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, - ) - shelly.inputs.file1 = "new_file_1.txt" - shelly.inputs.additional_inp = 2 - # generated_output_names should be the same as output_names - assert ( - shelly.output_names - == shelly.generated_output_names - == ["return_code", "stdout", "stderr", "newfile1"] ) + shelly.file1 = File.mock("new_file_1.txt") + shelly.additional_inp = 2 + + outputs = shelly(cache_dir=tmp_path) + assert get_output_names(shelly) == [ + "newfile1", + "return_code", + "stderr", + "stdout", + ] - res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() -def test_shell_cmd_inputspec_outputspec_4a(): +def test_shell_cmd_inputspec_outputspec_4a(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires filed adding one additional input to the requires together with a list of the allowed values, - the input is set to a value that is not in the list, so output is NOTHING + the input is set to a value that is not in the list, so output is None """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, - ), - ("additional_inp", int, {"help_string": "additional inp"}), - ], - bases=(ShellSpec,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help_string": "newfile 1", - "requires": ["file1", ("additional_inp", [2, 3])], - }, + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + additional_inp: int | None = shell.arg(help="additional inp", default=None) + + class Outputs(ShellOutputs): + + newfile1: File | None = shell.out( + callable=lambda file1: file1, + help="newfile 1", + requires=("file1", ("additional_inp", [2, 3])), ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, - ) - shelly.inputs.file1 = "new_file_1.txt" + + shelly = Shelly(executable=cmd) + shelly.file1 = File.mock("new_file_1.txt") # the value is not in the list from requires - shelly.inputs.additional_inp = 1 + shelly.additional_inp = 1 - res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1 is attr.NOTHING + outputs = shelly(cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.newfile1 is None -def test_shell_cmd_inputspec_outputspec_5(): +def test_shell_cmd_inputspec_outputspec_5(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires requires is a list of list so it is treated as OR list (i.e. el[0] OR el[1] OR...) the firs element of the requires list has all the fields set """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, - ), - ("additional_inp_A", int, {"help_string": "additional inp A"}), - ("additional_inp_B", str, {"help_string": "additional inp B"}), - ], - bases=(ShellSpec,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help_string": "newfile 1", - # requires is a list of list so it's treated as el[0] OR el[1] OR... - "requires": [ - ["file1", "additional_inp_A"], - ["file1", "additional_inp_B"], - ], - }, + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + additional_inp_A: int | None = shell.arg(help="additional inp A", default=None) + additional_inp_B: str | None = shell.arg(help="additional inp B", default=None) + + class Outputs(ShellOutputs): + + newfile1: File | None = shell.out( + callable=lambda file1: file1, + help="newfile 1", + # requires is a list of list so it's treated as el[0] OR el[1] OR... + requires=[ + ["file1", "additional_inp_A"], + ["file1", "additional_inp_B"], + ], ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" - shelly.inputs.additional_inp_A = 2 + shelly.file1 = File.mock("new_file_1.txt") + shelly.additional_inp_A = 2 - res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() + outputs = shelly(cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() -def test_shell_cmd_inputspec_outputspec_5a(): +def test_shell_cmd_inputspec_outputspec_5a(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires requires is a list of list so it is treated as OR list (i.e. el[0] OR el[1] OR...) the second element of the requires list (i.e. additional_inp_B) has all the fields set """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, - ), - ("additional_inp_A", str, {"help_string": "additional inp A"}), - ("additional_inp_B", int, {"help_string": "additional inp B"}), - ], - bases=(ShellSpec,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help_string": "newfile 1", - # requires is a list of list so it's treated as el[0] OR el[1] OR... - "requires": [ - ["file1", "additional_inp_A"], - ["file1", "additional_inp_B"], - ], - }, + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + additional_inp_A: str | None = shell.arg(help="additional inp A", default=None) + additional_inp_B: int | None = shell.arg(help="additional inp B", default=None) + + class Outputs(ShellOutputs): + + newfile1: File | None = shell.out( + callable=lambda file1: file1, + help="newfile 1", + # requires is a list of list so it's treated as el[0] OR el[1] OR... + requires=[ + ["file1", "additional_inp_A"], + ["file1", "additional_inp_B"], + ], ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" - shelly.inputs.additional_inp_B = 2 + shelly.file1 = File.mock("new_file_1.txt") + shelly.additional_inp_B = 2 - res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() + outputs = shelly(cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() -def test_shell_cmd_inputspec_outputspec_5b(): +def test_shell_cmd_inputspec_outputspec_5b(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires requires is a list of list so it is treated as OR list (i.e. el[0] OR el[1] OR...) neither of the list from requirements has all the fields set, so the output is NOTHING """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, - ), - ("additional_inp_A", str, {"help_string": "additional inp A"}), - ("additional_inp_B", str, {"help_string": "additional inp B"}), - ], - bases=(ShellSpec,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help_string": "newfile 1", - # requires is a list of list so it's treated as el[0] OR el[1] OR... - "requires": [ - ["file1", "additional_inp_A"], - ["file1", "additional_inp_B"], - ], - }, + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + additional_inp_A: str | None = shell.arg(help="additional inp A", default=None) + additional_inp_B: str | None = shell.arg(help="additional inp B", default=None) + + class Outputs(ShellOutputs): + + newfile1: File | None = shell.out( + callable=lambda file1: file1, + help="newfile 1", + # requires is a list of list so it's treated as el[0] OR el[1] OR... + requires=[ + ["file1", "additional_inp_A"], + ["file1", "additional_inp_B"], + ], ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, - ) - shelly.inputs.file1 = "new_file_1.txt" - res = shelly() - assert res.output.stdout == "" - # neither additional_inp_A nor additional_inp_B is set, so newfile1 is NOTHING - assert res.output.newfile1 is attr.NOTHING + shelly = Shelly(executable=cmd) + shelly.file1 = "new_file_1.txt" + + outputs = shelly(cache_dir=tmp_path) + assert outputs.stdout == "" + # neither additional_inp_A nor additional_inp_B is set, so newfile1 is None + assert outputs.newfile1 is None -def test_shell_cmd_inputspec_outputspec_6_except(): +@pytest.mark.xfail( + reason="I'm not sure why this requirements specification should fail" +) +def test_shell_cmd_inputspec_outputspec_6_except(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires requires has invalid syntax - exception is raised """ - cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, - ), - ("additional_inp_A", str, {"help_string": "additional inp A"}), - ], - bases=(ShellSpec,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help_string": "newfile 1", - # requires has invalid syntax - "requires": [["file1", "additional_inp_A"], "file1"], - }, + with pytest.raises(Exception, match="requires field can be"): + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "touch" + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + additional_inp_A: str | None = shell.arg( + default=None, help="additional inp A" ) - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, - ) - shelly.inputs.file1 = "new_file_1.txt" - with pytest.raises(Exception, match="requires field can be"): - shelly() + class Outputs(ShellOutputs): + newfile1: File | None = shell.out( + callable=lambda file1: file1, + help="newfile 1", + # requires has invalid syntax + requires=[["file1", "additional_inp_A"], "file1"], + ) def no_fsl(): @@ -4107,780 +2988,436 @@ def no_fsl(): @pytest.mark.skipif(no_fsl(), reason="fsl is not installed") -def test_fsl(data_tests_dir): +def test_fsl(data_tests_dir, tmp_path): """mandatory field added to fields, value provided""" - _xor_inputs = [ - "functional", - "reduce_bias", - "robust", - "padding", - "remove_eyes", - "surfaces", - "t2_guided", - ] - def change_name(file): name, ext = os.path.splitext(file) return f"{name}_brain.{ext}" - bet_input_spec = SpecInfo( - name="Input", - # TODO: change the position?? - fields=[ - ( - "in_file", - attr.ib( - type=File, - metadata={ - "help_string": "input file to skull strip", - "position": 1, - "mandatory": True, - "argstr": "", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "help_string": "name of output skull stripped image", - "position": 2, - "argstr": "", - "output_file_template": "{in_file}_brain", - }, - ), - ), - ( - "outline", - attr.ib( - type=bool, - metadata={ - "help_string": "create surface outline image", - "argstr": "-o", - }, - ), - ), - ( - "mask", - attr.ib( - type=bool, - metadata={ - "help_string": "create binary mask image", - "argstr": "-m", - }, - ), - ), - ( - "skull", - attr.ib( - type=bool, - metadata={"help_string": "create skull image", "argstr": "-s"}, - ), - ), - ( - "no_output", - attr.ib( - type=bool, - metadata={ - "help_string": "Don't generate segmented output", - "argstr": "-n", - }, - ), - ), - ( - "frac", - attr.ib( - type=float, - metadata={ - "help_string": "fractional intensity threshold", - "argstr": "-f", - }, - ), - ), - ( - "vertical_gradient", - attr.ib( - type=float, - metadata={ - "help_string": "vertical gradient in fractional intensity threshold (-1, 1)", - "argstr": "-g", - "allowed_values": {"min_val": -1, "max_val": 1}, - }, - ), - ), - ( - "radius", - attr.ib( - type=int, metadata={"argstr": "-r", "help_string": "head radius"} - ), - ), - ( - "center", - attr.ib( - type=ty.List[int], - metadata={ - "help_string": "center of gravity in voxels", - "argstr": "-c", - "allowed_values": {"min_value": 0, "max_value": 3}, - }, - ), - ), - ( - "threshold", - attr.ib( - type=bool, - metadata={ - "argstr": "-t", - "help_string": "apply thresholding to segmented brain image and mask", - }, - ), - ), - ( - "mesh", - attr.ib( - type=bool, - metadata={ - "argstr": "-e", - "help_string": "generate a vtk mesh brain surface", - }, - ), - ), - ( - "robust", - attr.ib( - type=bool, - metadata={ - "help_string": "robust brain centre estimation (iterates BET several times)", - "argstr": "-R", - "xor": _xor_inputs, - }, - ), - ), - ( - "padding", - attr.ib( - type=bool, - metadata={ - "help_string": "improve BET if FOV is very small in Z (by temporarily padding end slices", - "argstr": "-Z", - "xor": _xor_inputs, - }, - ), - ), - ( - "remove_eyes", - attr.ib( - type=bool, - metadata={ - "help_string": "eye & optic nerve cleanup (can be useful in SIENA)", - "argstr": "-S", - "xor": _xor_inputs, - }, - ), - ), - ( - "surfaces", - attr.ib( - type=bool, - metadata={ - "help_string": "run bet2 and then betsurf to get additional skull and scalp surfaces (includes registrations)", - "argstr": "-A", - "xor": _xor_inputs, - }, - ), - ), - ( - "t2_guided", - attr.ib( - type=ty.Union[File, str], - metadata={ - "help_string": "as with creating surfaces, when also feeding in non-brain-extracted T2 (includes registrations)", - "argstr": "-A2", - "xor": _xor_inputs, - }, - ), - ), - ( - "functional", - attr.ib( - type=bool, - metadata={ - "argstr": "-F", - "xor": _xor_inputs, - "help_string": "apply to 4D fMRI data", - }, - ), - ), - ( - "reduce_bias", - attr.ib( - type=bool, - metadata={ - "argstr": "-B", - "xor": _xor_inputs, - "help_string": "bias field and neck cleanup", - }, - ), - ), - # ("number_classes", int, attr.ib(metadata={"help_string": 'number of tissue-type classes', "argstr": '-n', - # "allowed_values": {"min_val": 1, "max_val": 10}})), - # ("output_biasfield", bool, - # attr.ib(metadata={"help_string": 'output estimated bias field', "argstr": '-b'})), - # ("output_biascorrected", bool, - # attr.ib(metadata={"help_string": 'output restored image (bias-corrected image)', "argstr": '-B'})), - ], - bases=(ShellSpec,), + @shell.define( + xor=[ + "functional", + "reduce_bias", + "robust", + "padding", + "remove_eyes", + "surfaces", + "t2_guided", + None, + ] ) + class Bet(ShellDef["Bet.Outputs"]): + executable = "bet" + in_file: File = shell.arg( + help="input file to skull strip", + position=1, + argstr="", + ) + + outline: bool = shell.arg( + default=False, + help="create surface outline image", + argstr="-o", + ) + mask: bool = shell.arg( + default=False, + help="create binary mask image", + argstr="-m", + ) + skull: bool = shell.arg( + default=False, + help="create skull image", + argstr="-s", + ) + no_output: bool = shell.arg( + default=False, + help="Don't generate segmented output", + argstr="-n", + ) + frac: float | None = shell.arg( + default=None, + help="fractional intensity threshold", + argstr="-f", + ) + vertical_gradient: float | None = shell.arg( + default=None, + help="vertical gradient in fractional intensity threshold (-1, 1)", + argstr="-g", + allowed_values={"min_val": -1, "max_val": 1}, + ) + radius: int | None = shell.arg(default=None, argstr="-r", help="head radius") + center: ty.List[int] | None = shell.arg( + default=None, + help="center of gravity in voxels", + argstr="-c", + allowed_values={"min_value": 0, "max_value": 3}, + ) + threshold: bool = shell.arg( + default=False, + argstr="-t", + help="apply thresholding to segmented brain image and mask", + ) + mesh: bool = shell.arg( + default=False, + argstr="-e", + help="generate a vtk mesh brain surface", + ) + robust: bool = shell.arg( + default=False, + help="robust brain centre estimation (iterates BET several times)", + argstr="-R", + ) + padding: bool = shell.arg( + default=False, + help="improve BET if FOV is very small in Z (by temporarily padding end slices", + argstr="-Z", + ) + remove_eyes: bool = shell.arg( + default=False, + help="eye & optic nerve cleanup (can be useful in SIENA)", + argstr="-S", + ) + surfaces: bool = shell.arg( + default=False, + help="run bet2 and then betsurf to get additional skull and scalp surfaces (includes registrations)", + argstr="-A", + ) + t2_guided: File | str | None = shell.arg( + default=None, + help="as with creating surfaces, when also feeding in non-brain-extracted T2 (includes registrations)", + argstr="-A2", + ) + functional: bool = shell.arg( + default=False, + argstr="-F", + help="apply to 4D fMRI data", + ) + reduce_bias: bool = shell.arg( + default=False, + argstr="-B", + help="bias field and neck cleanup", + ) + + class Outputs(ShellOutputs): + out_file: File = shell.outarg( + help="name of output skull stripped image", + position=2, + argstr="", + path_template="{in_file}_brain", + ) + + # ("number_classes", int, attr.ib(metadata={help='number of tissue-type classes', argstr='-n', + # allowed_values={"min_val": 1, max_val=10}})), + # ("output_biasfield", bool, + # attr.ib(metadata={help='output estimated bias field', argstr='-b'})), + # ("output_biascorrected", bool, + # attr.ib(metadata={help='output restored image (bias-corrected image)', argstr='-B'})), # TODO: not sure why this has to be string in_file = data_tests_dir / "test.nii.gz" # separate command into exec + args - shelly = ShellCommandTask( - name="bet_task", executable="bet", in_file=in_file, input_spec=bet_input_spec - ) - out_file = shelly.output_dir / "test_brain.nii.gz" - assert shelly.inputs.executable == "bet" - assert shelly.cmdline == f"bet {in_file} {out_file}" - # res = shelly(plugin="cf") + shelly = Bet(in_file=File.mock("/path/to/nifti.nii.gz")) + assert shelly.executable == "bet" + try: + orig_dir = os.getcwd() + os.chdir(tmp_path) + assert ( + shelly.cmdline == f"bet /path/to/nifti.nii.gz {tmp_path}/nifti_brain.nii.gz" + ) + finally: + os.chdir(orig_dir) + shelly = Bet(in_file=in_file) + outputs = shelly(cache_dir=tmp_path) + assert outputs.out_file.name == "test_brain.nii.gz" def test_shell_cmd_optional_output_file1(tmp_path): """ Test to see that 'unused' doesn't complain about not having an output passed to it """ - my_cp_spec = SpecInfo( - name="Input", - fields=[ - ( - "input", - attr.ib( - type=File, metadata={"argstr": "", "help_string": "input file"} - ), - ), - ( - "output", - attr.ib( - type=Path, - metadata={ - "argstr": "", - "output_file_template": "out.txt", - "help_string": "output file", - }, - ), - ), - ( - "unused", - attr.ib( - type=ty.Union[Path, bool], - default=False, - metadata={ - "argstr": "--not-used", - "output_file_template": "out.txt", - "help_string": "dummy output", - }, - ), - ), - ], - bases=(ShellSpec,), - ) - my_cp = ShellCommandTask( - name="my_cp", - executable="cp", - input_spec=my_cp_spec, - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + input: File = shell.arg(argstr="", help="input file") + + executable = "cp" + + class Outputs(ShellOutputs): + output: File = shell.outarg( + argstr="", + path_template="out.txt", + help="output file", + ) + unused: File | None = shell.outarg( + default=None, + argstr="--not-used", + path_template="out.txt", + help="dummy output", + ) + file1 = tmp_path / "file1.txt" file1.write_text("foo") - result = my_cp(input=file1, unused=False) - assert result.output.output.fspath.read_text() == "foo" + my_cp = Shelly(input=file1, unused=False) + outputs = my_cp(cache_dir=tmp_path) + assert outputs.output.fspath.read_text() == "foo" def test_shell_cmd_optional_output_file2(tmp_path): """ Test to see that 'unused' doesn't complain about not having an output passed to it """ - my_cp_spec = SpecInfo( - name="Input", - fields=[ - ( - "input", - attr.ib( - type=File, metadata={"argstr": "", "help_string": "input file"} - ), - ), - ( - "output", - attr.ib( - type=ty.Union[Path, bool], - default=False, - metadata={ - "argstr": "", - "output_file_template": "out.txt", - "help_string": "dummy output", - }, - ), - ), - ], - bases=(ShellSpec,), - ) - my_cp = ShellCommandTask( - name="my_cp", - executable="cp", - input_spec=my_cp_spec, - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "cp" + + input: File = shell.arg(argstr="", help="input file") + + class Outputs(ShellOutputs): + output: File | None = shell.outarg( + argstr="", + path_template="out.txt", + help="dummy output", + ) + file1 = tmp_path / "file1.txt" file1.write_text("foo") - result = my_cp(input=file1, output=True) - assert result.output.output.fspath.read_text() == "foo" + my_cp = Shelly(input=file1, output=True) + outputs = my_cp(cache_dir=tmp_path) + assert outputs.output.fspath.read_text() == "foo" file2 = tmp_path / "file2.txt" file2.write_text("bar") + my_cp2 = Shelly(input=file2, output=False) with pytest.raises(RuntimeError): - my_cp(input=file2, output=False) + my_cp2() def test_shell_cmd_non_existing_outputs_1(tmp_path): """Checking that non existing output files do not return a phantom path, - but return NOTHING instead""" - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out_name", - attr.ib( - type=str, - metadata={ - "help_string": """ - base name of the pretend outputs. - """, - "mandatory": True, - }, - ), + but return None instead""" + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "echo" + out_name: str = shell.arg( + help=""" + base name of the pretend outputs. + """, + ) + + class Outputs(ShellOutputs): + out_1: File | None = shell.out( + help="fictional output #1", + callable=lambda: "out_1.nii", + ) + out_2: File | None = shell.out( + help="fictional output #2", + callable=lambda: "out_2.nii", ) - ], - bases=(ShellSpec,), - ) - out_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_1", - attr.ib( - type=File, - metadata={ - "help_string": "fictional output #1", - "output_file_template": "{out_name}_1.nii", - }, - ), - ), - ( - "out_2", - attr.ib( - type=File, - metadata={ - "help_string": "fictional output #2", - "output_file_template": "{out_name}_2.nii", - }, - ), - ), - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - cache_dir=tmp_path, - executable="echo", - input_spec=input_spec, - output_spec=out_spec, - out_name="test", - ) - shelly() - res = shelly.result() - assert res.output.out_1 == attr.NOTHING and res.output.out_2 == attr.NOTHING + shelly = Shelly(out_name="test") + outputs = shelly(cache_dir=tmp_path) + assert outputs.out_1 is None + assert outputs.out_2 is None def test_shell_cmd_non_existing_outputs_2(tmp_path): """Checking that non existing output files do not return a phantom path, - but return NOTHING instead. This test has one existing and one non existing output file. + but return None instead. This test has one existing and one non existing output file. """ - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out_name", - attr.ib( - type=str, - metadata={ - "help_string": """ - base name of the pretend outputs. - """, - "mandatory": True, - "argstr": "{out_name}_1.nii", - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "touch" + out_name: str = shell.arg( + help=""" + base name of the pretend outputs. + """, + argstr="{out_name}_1.nii", + ) + + class Outputs(ShellOutputs): + out_1: File = shell.outarg( + help="fictional output #1", + path_template="{out_name}_1.nii", + ) + out_2: File | None = shell.outarg( + help="fictional output #2", + path_template="{out_name}_2.nii", ) - ], - bases=(ShellSpec,), - ) - out_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_1", - attr.ib( - type=File, - metadata={ - "help_string": "fictional output #1", - "output_file_template": "{out_name}_1.nii", - }, - ), - ), - ( - "out_2", - attr.ib( - type=File, - metadata={ - "help_string": "fictional output #2", - "output_file_template": "{out_name}_2.nii", - }, - ), - ), - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - cache_dir=tmp_path, - executable="touch", - input_spec=input_spec, - output_spec=out_spec, - out_name="test", - ) - shelly() - res = shelly.result() + shelly = Shelly(out_name="test") + outputs = shelly(cache_dir=tmp_path) # the first output file is created - assert res.output.out_1.fspath == Path(shelly.output_dir) / Path("test_1.nii") - assert res.output.out_1.fspath.exists() + assert outputs.out_1.fspath == next(tmp_path.iterdir()) / "test_1.nii" + assert outputs.out_1.fspath.exists() # the second output file is not created - assert res.output.out_2 == attr.NOTHING + assert outputs.out_2 is None def test_shell_cmd_non_existing_outputs_3(tmp_path): """Checking that non existing output files do not return a phantom path, - but return NOTHING instead. This test has an existing mandatory output and another non existing output file. - """ - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out_name", - attr.ib( - type=str, - metadata={ - "help_string": """ - base name of the pretend outputs. - """, - "mandatory": True, - "argstr": "{out_name}_1.nii", - }, - ), + but return None instead. This test has an existing mandatory output and another + non existing output file. + """ + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "touch" + out_name: str = shell.arg( + help=""" + base name of the pretend outputs. + """, + argstr=None, + ) + + class Outputs(ShellOutputs): + out_1: File = shell.outarg( + help="real output #1", + path_template="{out_name}_1.nii", + ) + out_2: File | None = shell.out( + help="fictional output #2", + callable=lambda out_name: f"{out_name}_2.nii", ) - ], - bases=(ShellSpec,), - ) - out_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_1", - attr.ib( - type=File, - metadata={ - "help_string": "fictional output #1", - "output_file_template": "{out_name}_1.nii", - "mandatory": True, - }, - ), - ), - ( - "out_2", - attr.ib( - type=File, - metadata={ - "help_string": "fictional output #2", - "output_file_template": "{out_name}_2.nii", - }, - ), - ), - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - cache_dir=tmp_path, - executable="touch", - input_spec=input_spec, - output_spec=out_spec, - out_name="test", - ) - shelly() - res = shelly.result() + shelly = Shelly(out_name="test") + + outputs = shelly(cache_dir=tmp_path) # the first output file is created - assert res.output.out_1.fspath == Path(shelly.output_dir) / Path("test_1.nii") - assert res.output.out_1.fspath.exists() + assert outputs.out_1.fspath == next(tmp_path.iterdir()) / "test_1.nii" + assert outputs.out_1.fspath.exists() # the second output file is not created - assert res.output.out_2 == attr.NOTHING + assert outputs.out_2 is None def test_shell_cmd_non_existing_outputs_4(tmp_path): """Checking that non existing output files do not return a phantom path, - but return NOTHING instead. This test has an existing mandatory output and another non existing + but return None instead. This test has an existing mandatory output and another non existing mandatory output file.""" - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out_name", - attr.ib( - type=str, - metadata={ - "help_string": """ - base name of the pretend outputs. - """, - "mandatory": True, - "argstr": "{out_name}_1.nii", - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "touch" + out_name: str = shell.arg( + help="""base name of the pretend outputs.""", + argstr="{out_name}_1.nii", + ) + + class Outputs(ShellOutputs): + out_1: File = shell.out( + help="real output #1", + callable=lambda out_name: f"{out_name}_1.nii", + ) + out_2: File = shell.out( + help="fictional output #2", + callable=lambda out_name: f"{out_name}_2.nii", ) - ], - bases=(ShellSpec,), - ) - out_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_1", - attr.ib( - type=File, - metadata={ - "help_string": "fictional output #1", - "output_file_template": "{out_name}_1.nii", - "mandatory": True, - }, - ), - ), - ( - "out_2", - attr.ib( - type=File, - metadata={ - "help_string": "fictional output #2", - "output_file_template": "{out_name}_2.nii", - "mandatory": True, - }, - ), - ), - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - cache_dir=tmp_path, - executable="touch", - input_spec=input_spec, - output_spec=out_spec, - out_name="test", - ) + shelly = Shelly(out_name="test") # An exception should be raised because the second mandatory output does not exist - with pytest.raises(Exception) as excinfo: - shelly() - assert "mandatory output for variable out_2 does not exist" == str(excinfo.value) + with pytest.raises( + ValueError, + match=r"file system path\(s\) provided to mandatory field .* does not exist", + ): + shelly(cache_dir=tmp_path) # checking if the first output was created - assert (Path(shelly.output_dir) / Path("test_1.nii")).exists() + assert (next(tmp_path.iterdir()) / "test_1.nii").exists() def test_shell_cmd_non_existing_outputs_multi_1(tmp_path): """This test looks if non existing files of an multiOuputFile are also set to NOTHING""" - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out_name", - attr.ib( - type=MultiInputObj, - metadata={ - "help_string": """ + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "echo" + out_name: MultiInputObj = shell.arg( + help=""" base name of the pretend outputs. """, - "mandatory": True, - "argstr": "...", - }, - ), + argstr="...", + ) + + class Outputs(ShellOutputs): + out_list: MultiOutputFile | None = shell.out( + help="fictional output #1", + callable=lambda out_name: out_name, ) - ], - bases=(ShellSpec,), - ) - out_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_list", - attr.ib( - type=MultiOutputFile, - metadata={ - "help_string": "fictional output #1", - "output_file_template": "{out_name}", - }, - ), - ), - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - cache_dir=tmp_path, - executable="echo", - input_spec=input_spec, - output_spec=out_spec, - out_name=["test_1.nii", "test_2.nii"], - ) - shelly() - res = shelly.result() - # checking if the outputs are Nothing - assert res.output.out_list[0] == attr.NOTHING - assert res.output.out_list[1] == attr.NOTHING + shelly = Shelly(out_name=["test_1.nii", "test_2.nii"]) + + # with pytest.raises(ValueError): + outputs = shelly(cache_dir=tmp_path) + assert outputs.out_list == None def test_shell_cmd_non_existing_outputs_multi_2(tmp_path): """This test looks if non existing files of an multiOutputFile are also set to NOTHING. It checks that it also works if one file of the multiOutputFile actually exists.""" - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out_name", - attr.ib( - type=MultiInputObj, - metadata={ - "help_string": """ - base name of the pretend outputs. - """, - "sep": " test_1_real.nii", # hacky way of creating an extra file with that name - "mandatory": True, - "argstr": "...", - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "touch" + out_name: MultiInputObj = shell.arg( + help="""base name of the pretend outputs.""", + sep=" test_1_real.nii", # hacky way of creating an extra file with that name + argstr="...", + ) + + class Outputs(ShellOutputs): + out_list: MultiOutputFile | None = shell.out( + help="fictional output #1", + callable=lambda out_name: f"{out_name}_real.nii", ) - ], - bases=(ShellSpec,), - ) - out_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_list", - attr.ib( - type=MultiOutputFile, - metadata={ - "help_string": "fictional output #1", - "output_file_template": "{out_name}_real.nii", - }, - ), - ), - ], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - cache_dir=tmp_path, - executable="touch", - input_spec=input_spec, - output_spec=out_spec, - out_name=["test_1", "test_2"], - ) - shelly() - res = shelly.result() - # checking if the outputs are Nothing - assert res.output.out_list[0] == File(Path(shelly.output_dir) / "test_1_real.nii") - assert res.output.out_list[1] == attr.NOTHING + shelly = Shelly(out_name=["test_1", "test_2"]) + + outputs = shelly(cache_dir=tmp_path) + # checking if the outputs is None + assert outputs.out_list is None -@pytest.mark.xfail( - reason=( - "Not sure what the desired behaviour for formatter 5 is. Field is declared as a list " - "but a string containing the formatted arg is passed instead." - ) -) def test_shellspec_formatter_1(tmp_path): """test the input callable 'formatter'.""" - def spec_info(formatter): - return SpecInfo( - name="Input", - fields=[ - ( - "in1", - attr.ib( - type=str, - metadata={ - "help_string": """ - just a dummy name - """, - "mandatory": True, - }, - ), - ), - ( - "in2", - attr.ib( - type=str, - metadata={ - "help_string": """ - just a dummy name - """, - "mandatory": True, - }, - ), - ), - ( - "together", - attr.ib( - type=ty.List, - metadata={ - "help_string": """ - combines in1 and in2 into a list - """, - # When providing a formatter all other metadata options are discarded. - "formatter": formatter, - }, - ), - ), - ], - bases=(ShellSpec,), - ) - def formatter_1(inputs): print("FORMATTER:", inputs) return f"-t [{inputs['in1']}, {inputs['in2']}]" - input_spec = spec_info(formatter_1) - shelly = ShellCommandTask( - executable="exec", input_spec=input_spec, in1="i1", in2="i2" - ) + def make_shelly(formatter): + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "exec" + in1: str = shell.arg( + argstr=None, + help="""just a dummy name""", + ) + in2: str = shell.arg( + argstr=None, + help="""just a dummy name""", + ) + + together: ty.List = shell.arg( + default=attrs.Factory(list), + help="""combines in1 and in2 into a list""", + # When providing a formatter all other metadata options are discarded. + formatter=formatter, + ) + + class Outputs(ShellOutputs): + pass + + return Shelly + + Shelly = make_shelly(formatter=formatter_1) + shelly = Shelly(in1="i1", in2="i2") assert shelly.cmdline == "exec -t [i1, i2]" # testing that the formatter can overwrite a provided value for together. - shelly = ShellCommandTask( - executable="exec", - input_spec=input_spec, - in1="i1", - in2="i2", - together=[1], - ) + shelly = Shelly(in1="i1", in2="i2", together=[1]) assert shelly.cmdline == "exec -t [i1, i2]" # asking for specific inputs @@ -4888,111 +3425,67 @@ def formatter_2(in1, in2): print("FORMATTER:", in1, in2) return f"-t [{in1}, {in2}]" - input_spec = spec_info(formatter_2) + Shelly = make_shelly(formatter_2) - shelly = ShellCommandTask( - executable="exec", input_spec=input_spec, in1="i1", in2="i2" - ) + shelly = Shelly(in1="i1", in2="i2") assert shelly.cmdline == "exec -t [i1, i2]" def formatter_3(in1, in3): print("FORMATTER:", in1, in3) return f"-t [{in1}, {in3}]" - input_spec = spec_info(formatter_3) + Shelly = make_shelly(formatter_3) - shelly = ShellCommandTask( - executable="exec", input_spec=input_spec, in1="i1", in2="i2" - ) + shelly = Shelly(in1="i1", in2="i2") with pytest.raises(Exception) as excinfo: shelly.cmdline assert ( - "arguments of the formatter function from together has to be in inputs or be field or output_dir, but in3 is used" + "arguments of the formatter function from together has to be in inputs or be field, but in3 is used" == str(excinfo.value) ) # checking if field value is accessible when None - def formatter_5(field): - assert field == "-t test" + def formatter_4(field): + assert isinstance(field, shell.arg) # formatter must return a string - return field + return "-t test" - input_spec = spec_info(formatter_5) + Shelly = make_shelly(formatter_4) - shelly = ShellCommandTask( - executable="exec", - input_spec=input_spec, + shelly = Shelly( in1="i1", in2="i2", # together="-t test", ) assert shelly.cmdline == "exec -t test" - # checking if field value is accessible when None - def formatter_4(field): - assert field is None - # formatter must return a string - return "" - - input_spec = spec_info(formatter_4) - - shelly = ShellCommandTask( - executable="exec", input_spec=input_spec, in1="i1", in2="i2" - ) - assert shelly.cmdline == "exec" - def test_shellspec_formatter_splitter_2(tmp_path): """test the input callable 'formatter' when a splitter is used on an argument of the formatter.""" - def spec_info(formatter): - return SpecInfo( - name="Input", - fields=[ - ( - "in1", - attr.ib( - type=str, - metadata={ - "help_string": "in1", - }, - ), - ), - ( - "in2", - attr.ib( - type=str, - metadata={ - "help_string": "in2", - }, - ), - ), - ( - "together", - attr.ib( - type=ty.List, - metadata={ - "help_string": """ - uses in1 - """, - # When providing a formatter all other metadata options are discarded. - "formatter": formatter, - }, - ), - ), - ], - bases=(ShellSpec,), - ) - # asking for specific inputs def formatter_1(in1, in2): return f"-t [{in1} {in2}]" - input_spec = spec_info(formatter_1) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "executable" + in1: str = shell.arg(help="in1") + in2: str = shell.arg(help="in2") + together: ty.List = shell.arg( + help=""" + uses in1 + """, + # When providing a formatter all other metadata options are discarded. + formatter=formatter_1, + sep=" ", + ) + + class Outputs(ShellOutputs): + pass + in1 = ["in11", "in12"] - shelly = ShellCommandTask( - name="f", executable="executable", input_spec=input_spec, in2="in2" - ).split("in1", in1=in1) + shelly = Shelly(in2="in2").split("in1", in1=in1) assert shelly is not None # results = shelly.cmdline @@ -5025,30 +3518,26 @@ def test_shellcommand_error_msg(tmp_path): ), ) - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "in1", - str, - {"help_string": "a dummy string", "argstr": "", "mandatory": True}, - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): - shelly = ShellCommandTask( - name="err_msg", executable=str(script_path), input_spec=input_spec, in1="hello" - ) + executable = str(script_path) + + in1: str = shell.arg(help="a dummy string", argstr="") + + class Outputs(ShellOutputs): + pass + + shelly = Shelly(in1="hello") with pytest.raises(RuntimeError) as excinfo: - shelly() + shelly(cache_dir=tmp_path) path_str = str(script_path) assert ( str(excinfo.value) - == f"""Error running 'err_msg' task with ['{path_str}', 'hello']: + == f"""Error running 'main' task with ['{path_str}', 'hello']: stderr: {path_str}: line 3: /command-that-doesnt-exist: No such file or directory diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index 9bc7f7a232..413469d1f7 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -1,65 +1,60 @@ import typing as ty from pathlib import Path -import attr +import attrs import pytest - -from ..task import ShellCommandTask -from ..specs import ( - ShellOutSpec, - ShellSpec, - SpecInfo, - File, - MultiInputObj, -) +from pydra.engine.specs import ShellOutputs, ShellDef +from fileformats.generic import File +from pydra.design import shell +from pydra.utils.typing import MultiInputObj +from .utils import get_output_names def test_shell_cmd_execargs_1(): # separate command into exec + args - shelly = ShellCommandTask(executable="executable", args="arg") + Shelly = shell.define(["executable", "arg"]) + shelly = Shelly() assert shelly.cmdline == "executable arg" - assert shelly.name == "ShellTask_noname" def test_shell_cmd_execargs_2(): # separate command into exec + args - shelly = ShellCommandTask(executable=["cmd_1", "cmd_2"], args="arg") + Shelly = shell.define(["cmd_1", "cmd_2", "arg"]) + shelly = Shelly() assert shelly.cmdline == "cmd_1 cmd_2 arg" def test_shell_cmd_inputs_1(): """additional input with provided position""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": 1, "help_string": "inp1", "argstr": ""}, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", args="arg", inpA="inp1", input_spec=my_input_spec + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + inpA: str = shell.arg(position=1, help="inp1", argstr="") + + shelly = Shelly( + additional_args=["arg"], + inpA="inp1", ) assert shelly.cmdline == "executable inp1 arg" def test_shell_cmd_inputs_1a(): """additional input without provided position""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ("inpA", attr.ib(type=str, metadata={"help_string": "inpA", "argstr": ""})) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", args="arg", inpA="inpNone1", input_spec=my_input_spec + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + inpA: str = shell.arg(help="inpA", argstr="") + + shelly = Shelly( + additional_args=["arg"], + inpA="inpNone1", ) # inp1 should be the first one after executable assert shelly.cmdline == "executable inpNone1 arg" @@ -67,200 +62,104 @@ def test_shell_cmd_inputs_1a(): def test_shell_cmd_inputs_1b(): """additional input with negative position""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": -1, "help_string": "inpA", "argstr": ""}, - ), - ) - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + inpA: str = shell.arg(position=-1, help="inpA", argstr="") # separate command into exec + args - shelly = ShellCommandTask( - executable="executable", args="arg", inpA="inp-1", input_spec=my_input_spec + shelly = Shelly( + additional_args=["arg"], + inpA="inp-1", ) # inp1 should be last before arg assert shelly.cmdline == "executable inp-1 arg" -def test_shell_cmd_inputs_1_st(): - """additional input with provided position, checking cmdline when splitter""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": 1, "help_string": "inp1", "argstr": ""}, - ), - ) - ], - bases=(ShellSpec,), - ) +def test_shell_cmd_inputs_2(): + """additional inputs with provided positions""" - ShellCommandTask( - name="shelly", - executable="executable", - args="arg", - input_spec=my_input_spec, - ).split("inpA", inpA=["inp1", "inp2"]) - # cmdline should be a list - # assert shelly.cmdline[0] == "executable inp1 arg" - # assert shelly.cmdline[1] == "executable inp2 arg" + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + executable = "executable" -def test_shell_cmd_inputs_2(): - """additional inputs with provided positions""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": 2, "help_string": "inpA", "argstr": ""}, - ), - ), - ( - "inpB", - attr.ib( - type=str, - metadata={"position": 1, "help_string": "inpN", "argstr": ""}, - ), - ), - ], - bases=(ShellSpec,), - ) + inpA: str = shell.arg(position=2, help="inpA", argstr="") + inpB: str = shell.arg(position=1, help="inpN", argstr="") # separate command into exec + args - shelly = ShellCommandTask( - executable="executable", inpB="inp1", inpA="inp2", input_spec=my_input_spec + shelly = Shelly( + inpB="inp1", + inpA="inp2", ) assert shelly.cmdline == "executable inp1 inp2" def test_shell_cmd_inputs_2a(): """additional inputs without provided positions""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ("inpA", attr.ib(type=str, metadata={"help_string": "inpA", "argstr": ""})), - ("inpB", attr.ib(type=str, metadata={"help_string": "inpB", "argstr": ""})), - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg(help="inpA", argstr="") + inpB: str = shell.arg(help="inpB", argstr="") # separate command into exec + args - shelly = ShellCommandTask( - executable="executable", + shelly = Shelly( inpA="inpNone1", inpB="inpNone2", - input_spec=my_input_spec, ) - # position taken from the order in input spec + # position taken from the order in input definition assert shelly.cmdline == "executable inpNone1 inpNone2" def test_shell_cmd_inputs_2_err(): """additional inputs with provided positions (exception due to the duplication)""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": 1, "help_string": "inpA", "argstr": ""}, - ), - ), - ( - "inpB", - attr.ib( - type=str, - metadata={"position": 1, "help_string": "inpB", "argstr": ""}, - ), - ), - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", inpA="inp1", inpB="inp2", input_spec=my_input_spec - ) with pytest.raises(Exception) as e: - shelly.cmdline - assert "1 is already used" in str(e.value) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass -def test_shell_cmd_inputs_2_noerr(): - """additional inputs with provided positions - (duplication of the position doesn't lead to error, since only one field has value) - """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": 1, "help_string": "inpA", "argstr": ""}, - ), - ), - ( - "inpB", - attr.ib( - type=str, - metadata={"position": 1, "help_string": "inpB", "argstr": ""}, - ), - ), - ], - bases=(ShellSpec,), - ) + executable = "executable" - shelly = ShellCommandTask( - executable="executable", inpA="inp1", input_spec=my_input_spec - ) - shelly.cmdline + inpA: str = shell.arg(position=1, help="inpA", argstr="") + inpB: str = shell.arg(position=1, help="inpB", argstr="") + + assert "Multiple fields have the overlapping positions" in str(e.value) def test_shell_cmd_inputs_3(): """additional inputs: positive pos, negative pos and no pos""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": 1, "help_string": "inpA", "argstr": ""}, - ), - ), - ( - "inpB", - attr.ib( - type=str, - metadata={"position": -1, "help_string": "inpB", "argstr": ""}, - ), - ), - ("inpC", attr.ib(type=str, metadata={"help_string": "inpC", "argstr": ""})), - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg(position=1, help="inpA", argstr="") + inpB: str = shell.arg(position=-1, help="inpB", argstr="") + inpC: str = shell.arg(help="inpC", argstr="") # separate command into exec + args - shelly = ShellCommandTask( - executable="executable", + shelly = Shelly( inpA="inp1", inpB="inp-1", inpC="inpNone", - input_spec=my_input_spec, ) # input without position should be between positive an negative positions assert shelly.cmdline == "executable inp1 inpNone inp-1" @@ -268,1344 +167,925 @@ def test_shell_cmd_inputs_3(): def test_shell_cmd_inputs_argstr_1(): """additional string inputs with argstr""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": 1, "help_string": "inpA", "argstr": "-v"}, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", inpA="inp1", input_spec=my_input_spec - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg(position=1, help="inpA", argstr="-v") + + shelly = Shelly(inpA="inp1") # flag used before inp1 assert shelly.cmdline == "executable -v inp1" def test_shell_cmd_inputs_argstr_2(): """additional bool inputs with argstr""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=bool, - metadata={"position": 1, "help_string": "inpA", "argstr": "-v"}, - ), - ) - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: bool = shell.arg(position=1, help="inpA", argstr="-v") # separate command into exec + args - shelly = ShellCommandTask( - executable="executable", args="arg", inpA=True, input_spec=my_input_spec - ) + shelly = Shelly(additional_args=["arg"], inpA=True) # a flag is used without any additional argument assert shelly.cmdline == "executable -v arg" def test_shell_cmd_inputs_list_1(): """providing list as an additional input, no sep, no argstr""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=ty.List[str], - metadata={"position": 2, "help_string": "inpA", "argstr": ""}, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", inpA=["el_1", "el_2", "el_3"], input_spec=my_input_spec - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: ty.List[str] = shell.arg(position=2, help="inpA", argstr="", sep=" ") + + shelly = Shelly(inpA=["el_1", "el_2", "el_3"]) # multiple elements assert shelly.cmdline == "executable el_1 el_2 el_3" def test_shell_cmd_inputs_list_2(): """providing list as an additional input, no sep, but argstr""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=ty.List[str], - metadata={"position": 2, "help_string": "inpA", "argstr": "-v"}, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", inpA=["el_1", "el_2", "el_3"], input_spec=my_input_spec - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: ty.List[str] = shell.arg(position=2, help="inpA", argstr="-v", sep=" ") + + shelly = Shelly(inpA=["el_1", "el_2", "el_3"]) assert shelly.cmdline == "executable -v el_1 el_2 el_3" def test_shell_cmd_inputs_list_3(): """providing list as an additional input, no sep, argstr with ...""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=ty.List[str], - metadata={"position": 2, "help_string": "inpA", "argstr": "-v..."}, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", inpA=["el_1", "el_2", "el_3"], input_spec=my_input_spec - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: ty.List[str] = shell.arg(position=2, help="inpA", argstr="-v...", sep=" ") + + shelly = Shelly(inpA=["el_1", "el_2", "el_3"]) # a flag is repeated assert shelly.cmdline == "executable -v el_1 -v el_2 -v el_3" def test_shell_cmd_inputs_list_sep_1(): """providing list as an additional input:, sep, no argstr""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=MultiInputObj[str], - metadata={ - "position": 1, - "help_string": "inpA", - "sep": ",", - "argstr": "", - }, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", - inpA=["aaa", "bbb", "ccc"], - input_spec=my_input_spec, - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: list[str] = shell.arg( + position=1, + help="inpA", + sep=",", + argstr="", + ) + + shelly = Shelly(inpA=["aaa", "bbb", "ccc"]) # separated by commas assert shelly.cmdline == "executable aaa,bbb,ccc" def test_shell_cmd_inputs_list_sep_2(): """providing list as an additional input:, sep, and argstr""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=MultiInputObj[str], - metadata={ - "position": 1, - "help_string": "inpA", - "sep": ",", - "argstr": "-v", - }, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", - inpA=["aaa", "bbb", "ccc"], - input_spec=my_input_spec, - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: list[str] = shell.arg( + position=1, + help="inpA", + sep=",", + argstr="-v", + ) + + shelly = Shelly(inpA=["aaa", "bbb", "ccc"]) # a flag is used once assert shelly.cmdline == "executable -v aaa,bbb,ccc" def test_shell_cmd_inputs_list_sep_2a(): """providing list as an additional input:, sep, and argstr with f-string""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=MultiInputObj[str], - metadata={ - "position": 1, - "help_string": "inpA", - "sep": ",", - "argstr": "-v {inpA}", - }, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", - inpA=["aaa", "bbb", "ccc"], - input_spec=my_input_spec, - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: list[str] = shell.arg( + position=1, + help="inpA", + sep=",", + argstr="-v {inpA}", + ) + + shelly = Shelly(inpA=["aaa", "bbb", "ccc"]) # a flag is used once assert shelly.cmdline == "executable -v aaa,bbb,ccc" def test_shell_cmd_inputs_list_sep_3(): """providing list as an additional input:, sep, argstr with ...""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=MultiInputObj[str], - metadata={ - "position": 1, - "help_string": "inpA", - "sep": ",", - "argstr": "-v...", - }, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", - inpA=["aaa", "bbb", "ccc"], - input_spec=my_input_spec, - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: list[str] = shell.arg( + position=1, + help="inpA", + sep=",", + argstr="-v...", + ) + + shelly = Shelly(inpA=["aaa", "bbb", "ccc"]) # a flag is repeated assert shelly.cmdline == "executable -v aaa, -v bbb, -v ccc" def test_shell_cmd_inputs_list_sep_3a(): """providing list as an additional input:, sep, argstr with ... and f-string""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=MultiInputObj[str], - metadata={ - "position": 1, - "help_string": "inpA", - "sep": ",", - "argstr": "-v {inpA}...", - }, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", - inpA=["aaa", "bbb", "ccc"], - input_spec=my_input_spec, - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: list[str] = shell.arg( + position=1, + help="inpA", + sep=",", + argstr="-v {inpA}...", + ) + + shelly = Shelly(inpA=["aaa", "bbb", "ccc"]) # a flag is repeated assert shelly.cmdline == "executable -v aaa, -v bbb, -v ccc" def test_shell_cmd_inputs_sep_4(): """providing 1-el list as an additional input:, sep, argstr with ...,""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=MultiInputObj[str], - metadata={ - "position": 1, - "help_string": "inpA", - "sep": ",", - "argstr": "-v...", - }, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", inpA=["aaa"], input_spec=my_input_spec - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: MultiInputObj[str] = shell.arg( + position=1, + help="inpA", + argstr="-v...", + ) + + shelly = Shelly(inpA=["aaa"]) assert shelly.cmdline == "executable -v aaa" def test_shell_cmd_inputs_sep_4a(): """providing str instead of list as an additional input:, sep, argstr with ...""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "inpA", - "sep": ",", - "argstr": "-v...", - }, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", inpA="aaa", input_spec=my_input_spec - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="-v...", + ) + + shelly = Shelly(inpA="aaa") assert shelly.cmdline == "executable -v aaa" def test_shell_cmd_inputs_format_1(): """additional inputs with argstr that has string formatting""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "-v {inpA}", - }, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", inpA="aaa", input_spec=my_input_spec - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="-v {inpA}", + ) + + shelly = Shelly(inpA="aaa") assert shelly.cmdline == "executable -v aaa" def test_shell_cmd_inputs_format_2(): """additional inputs with argstr that has string formatting and ...""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=MultiInputObj[str], - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "-v {inpA}...", - }, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", - inpA=["el_1", "el_2"], - input_spec=my_input_spec, - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: MultiInputObj[str] = shell.arg( + position=1, + help="inpA", + argstr="-v {inpA}...", + ) + + shelly = Shelly(inpA=["el_1", "el_2"]) assert shelly.cmdline == "executable -v el_1 -v el_2" def test_shell_cmd_inputs_format_3(): """adding float formatting for argstr with input field""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=float, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "-v {inpA:.5f}", - }, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", inpA=0.007, input_spec=my_input_spec - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: float = shell.arg( + position=1, + help="inpA", + argstr="-v {inpA:.5f}", + ) + + shelly = Shelly(inpA=0.007) assert shelly.cmdline == "executable -v 0.00700" def test_shell_cmd_inputs_mandatory_1(): """additional inputs with mandatory=True""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask(executable="executable", input_spec=my_input_spec) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", + ) + + shelly = Shelly() with pytest.raises(Exception) as e: shelly.cmdline - assert "mandatory" in str(e.value) + assert "mandatory" in str(e.value).lower() def test_shell_cmd_inputs_not_given_1(): - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "arg1", - attr.ib( - type=MultiInputObj, - metadata={ - "argstr": "--arg1", - "help_string": "Command line argument 1", - }, - ), - ), - ( - "arg2", - attr.ib( - type=MultiInputObj, - metadata={ - "argstr": "--arg2", - "help_string": "Command line argument 2", - }, - ), - ), - ( - "arg3", - attr.ib( - type=File, - metadata={ - "argstr": "--arg3", - "help_string": "Command line argument 3", - }, - ), - ), - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - name="shelly", executable="executable", input_spec=my_input_spec - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + arg1: MultiInputObj = shell.arg( + argstr="--arg1", + default=attrs.Factory(list), + help="Command line argument 1", + ) + arg2: MultiInputObj = shell.arg( + argstr="--arg2", + help="Command line argument 2", + ) + arg3: File | None = shell.arg( + argstr="--arg3", + default=None, + help="Command line argument 3", + ) + + shelly = Shelly() - shelly.inputs.arg2 = "argument2" + shelly.arg2 = "argument2" assert shelly.cmdline == "executable --arg2 argument2" def test_shell_cmd_inputs_template_1(): - """additional inputs, one uses output_file_template (and argstr)""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help_string": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_out", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + """additional inputs, one uses path_template (and argstr)""" + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + outA: File = shell.outarg( + position=2, + help="outA", + argstr="-o", + path_template="{inpA}_out", + ) - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA" - ) + executable = "executable" + + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", + ) + + shelly = Shelly(inpA="inpA") # outA has argstr in the metadata fields, so it's a part of the command line # the full path will be use din the command line - assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" + assert shelly.cmdline == f"executable inpA -o {Path.cwd() / 'inpA_out'}" # checking if outA in the output fields - assert shelly.output_names == ["return_code", "stdout", "stderr", "outA"] - - -def test_shell_cmd_inputs_template_1a(): - """additional inputs, one uses output_file_template (without argstr)""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "help_string": "outA", - "output_file_template": "{inpA}_out", - }, - ), - ), - ], - bases=(ShellSpec,), - ) - - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA" - ) - # outA has no argstr in metadata, so it's not a part of the command line - assert shelly.cmdline == "executable inpA" + assert get_output_names(shelly) == ["outA", "return_code", "stderr", "stdout"] # TODO: after deciding how we use requires/templates def test_shell_cmd_inputs_template_2(): - """additional inputs, one uses output_file_template (and argstr, but input not provided)""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpB", - attr.ib( - type=str, - metadata={"position": 1, "help_string": "inpB", "argstr": ""}, - ), - ), - ( - "outB", - attr.ib( - type=str, - metadata={ - "position": 2, - "help_string": "outB", - "argstr": "-o", - "output_file_template": "{inpB}_out", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + """additional inputs, one uses path_template (and argstr, but input not provided)""" + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + outB: File | None = shell.outarg( + position=2, + help="outB", + argstr="-o", + path_template="{inpB}_out", + ) + + executable = "executable" + + inpB: File | None = shell.arg(position=1, help="inpB", argstr="", default=None) - shelly = ShellCommandTask(executable="executable", input_spec=my_input_spec) + shelly = Shelly() # inpB not in the inputs, so no outB in the command line assert shelly.cmdline == "executable" # checking if outB in the output fields - assert shelly.output_names == ["return_code", "stdout", "stderr", "outB"] + assert get_output_names(shelly) == ["outB", "return_code", "stderr", "stdout"] def test_shell_cmd_inputs_template_3(tmp_path): - """additional inputs with output_file_template and an additional + """additional inputs with path_template and an additional read-only fields that combine two outputs together in the command line """ inpA = tmp_path / "inpA" inpB = tmp_path / "inpB" Path.touch(inpA) Path.touch(inpB) - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "inpB", - attr.ib( - type=str, - metadata={ - "position": 2, - "help_string": "inpB", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "help_string": "outA", - "output_file_template": "{inpA}_out", - }, - ), - ), - ( - "outB", - attr.ib( - type=str, - metadata={ - "help_string": "outB", - "output_file_template": "{inpB}_out", - }, - ), - ), - ( - "outAB", - attr.ib( - type=str, - metadata={ - "position": -1, - "help_string": "outAB", - "argstr": "-o {outA} {outB}", - "readonly": True, - }, - ), - ), - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA=inpA, inpB=inpB - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + + outA: File = shell.outarg( + help="outA", + argstr=None, + path_template="{inpA}_out", + ) + outB: File = shell.outarg( + help="outB", + argstr=None, + path_template="{inpB}_out", + ) + + executable = "executable" + + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", + ) + inpB: str = shell.arg( + position=2, + help="inpB", + argstr="", + ) + outAB: str = shell.arg( + position=-1, + help="outAB", + argstr="-o {outA} {outB}", + readonly=True, + ) + + shelly = Shelly(inpA=inpA, inpB=inpB) # using syntax from the outAB field assert ( shelly.cmdline - == f"executable {tmp_path / 'inpA'} {tmp_path / 'inpB'} -o {shelly.output_dir / 'inpA_out'} {str(shelly.output_dir / 'inpB_out')}" + == f"executable {tmp_path / 'inpA'} {tmp_path / 'inpB'} -o {Path.cwd() / 'inpA_out'} {str(Path.cwd() / 'inpB_out')}" ) # checking if outA and outB in the output fields (outAB should not be) - assert shelly.output_names == ["return_code", "stdout", "stderr", "outA", "outB"] + assert get_output_names(shelly) == [ + "outA", + "outB", + "return_code", + "stderr", + "stdout", + ] def test_shell_cmd_inputs_template_3a(): - """additional inputs with output_file_template and an additional + """additional inputs with path_template and an additional read-only fields that combine two outputs together in the command line - testing a different order within the input spec + testing a different order within the input definition """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "inpB", - attr.ib( - type=str, - metadata={ - "position": 2, - "help_string": "inpB", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "outAB", - attr.ib( - type=str, - metadata={ - "position": -1, - "help_string": "outAB", - "argstr": "-o {outA} {outB}", - "readonly": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "help_string": "outA", - "output_file_template": "{inpA}_out", - }, - ), - ), - ( - "outB", - attr.ib( - type=str, - metadata={ - "help_string": "outB", - "output_file_template": "{inpB}_out", - }, - ), - ), - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA", inpB="inpB" - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + + outA: File = shell.outarg( + argstr=None, + help="outA", + path_template="{inpA}_out", + ) + outB: File = shell.outarg( + argstr=None, + help="outB", + path_template="{inpB}_out", + ) + + executable = "executable" + + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", + ) + inpB: str = shell.arg( + position=2, + help="inpB", + argstr="", + ) + outAB: str = shell.arg( + position=-1, + help="outAB", + argstr="-o {outA} {outB}", + readonly=True, + ) + + shelly = Shelly(inpA="inpA", inpB="inpB") # using syntax from the outAB field assert ( shelly.cmdline - == f"executable inpA inpB -o {shelly.output_dir / 'inpA_out'} {str(shelly.output_dir / 'inpB_out')}" + == f"executable inpA inpB -o {Path.cwd() / 'inpA_out'} {str(Path.cwd() / 'inpB_out')}" ) # checking if outA and outB in the output fields (outAB should not be) - assert shelly.output_names == ["return_code", "stdout", "stderr", "outA", "outB"] + assert get_output_names(shelly) == [ + "outA", + "outB", + "return_code", + "stderr", + "stdout", + ] # TODO: after deciding how we use requires/templates def test_shell_cmd_inputs_template_4(): - """additional inputs with output_file_template and an additional + """additional inputs with path_template and an additional read-only fields that combine two outputs together in the command line - one output_file_template can't be resolved - no inpB is provided + one path_template can't be resolved - no inpB is provided """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "inpB", - attr.ib( - type=str, - metadata={"position": 2, "help_string": "inpB", "argstr": ""}, - ), - ), - ( - "outAB", - attr.ib( - type=str, - metadata={ - "position": -1, - "help_string": "outAB", - "argstr": "-o {outA} {outB}", - "readonly": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "help_string": "outA", - "output_file_template": "{inpA}_out", - }, - ), - ), - ( - "outB", - attr.ib( - type=str, - metadata={ - "help_string": "outB", - "output_file_template": "{inpB}_out", - }, - ), - ), - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA" - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + outA: File = shell.outarg( + argstr=None, + help="outA", + path_template="{inpA}_out", + ) + outB: File | None = shell.outarg( + argstr=None, + help="outB", + path_template="{inpB}_out", + ) + + executable = "executable" + + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", + ) + inpB: str | None = shell.arg(position=2, help="inpB", argstr="", default=None) + outAB: str = shell.arg( + position=-1, + help="outAB", + argstr="-o {outA} {outB}", + readonly=True, + ) + + shelly = Shelly(inpA="inpA") # inpB is not provided so outB not in the command line - assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" - assert shelly.output_names == ["return_code", "stdout", "stderr", "outA", "outB"] + assert shelly.cmdline == f"executable inpA -o {Path.cwd() / 'inpA_out'}" + assert get_output_names(shelly) == [ + "outA", + "outB", + "return_code", + "stderr", + "stdout", + ] def test_shell_cmd_inputs_template_5_ex(): """checking if the exception is raised for read-only fields when input is set""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "outAB", - attr.ib( - type=str, - metadata={ - "position": -1, - "help_string": "outAB", - "argstr": "-o", - "readonly": True, - }, - ), - ) - ], - bases=(ShellSpec,), - ) - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, outAB="outAB" - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + outAB: str = shell.arg( + position=-1, + help="outAB", + argstr="-o", + readonly=True, + ) + + shelly = Shelly(outAB="outAB") with pytest.raises(Exception) as e: shelly.cmdline assert "read only" in str(e.value) def test_shell_cmd_inputs_template_6(): - """additional inputs with output_file_template that has type ty.Union[str, bool] + """additional inputs with path_template that has type ty.Union[str, bool] no default is set, so if nothing is provided as an input, the output is used whenever the template can be formatted (the same way as for templates that has type=str) """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=ty.Union[str, bool], - metadata={ - "position": 2, - "help_string": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_out", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + outA: File = shell.outarg( + position=2, + help="outA", + argstr="-o", + path_template="{inpA}_out", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", + ) # no input for outA (and no default value), so the output is created whenever the # template can be formatted (the same way as for templates that has type=str) - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA" - ) - assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" + inpA = File.mock("inpA") + shelly = Shelly(inpA=inpA) + + inpA_path = Path.cwd() / "inpA" + outA_path = Path.cwd() / "inpA_out" + assert shelly.cmdline == f"executable {inpA_path} -o {outA_path}" # a string is provided for outA, so this should be used as the outA value - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA", outA="outA" - ) - assert shelly.cmdline == "executable inpA -o outA" + shelly = Shelly(inpA=inpA, outA="outA") + assert shelly.cmdline == f"executable {inpA_path} -o outA" # True is provided for outA, so the formatted template should be used as outA value - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA", outA=True - ) - assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" + shelly = Shelly(inpA=inpA, outA=True) + assert shelly.cmdline == f"executable {inpA_path} -o {outA_path}" # False is provided for outA, so the outA shouldn't be used - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA", outA=False - ) - assert shelly.cmdline == "executable inpA" + shelly = Shelly(inpA=inpA, outA=False) + assert shelly.cmdline == f"executable {inpA_path}" def test_shell_cmd_inputs_template_6a(): - """additional inputs with output_file_template that has type ty.Union[str, bool] + """additional inputs with path_template that has type ty.Union[str, bool] and default is set to False, so if nothing is provided as an input, the output is not used """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=ty.Union[str, bool], - default=False, - metadata={ - "position": 2, - "help_string": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_out", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + outA: File | None = shell.outarg( + position=2, + help="outA", + argstr="-o", + path_template="{inpA}_out", + ) + + executable = "executable" + + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", + ) # no input for outA, but default is False, so the outA shouldn't be used - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA" - ) + shelly = Shelly(inpA="inpA") assert shelly.cmdline == "executable inpA" # a string is provided for outA, so this should be used as the outA value - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA", outA="outA" - ) + shelly = Shelly(inpA="inpA", outA="outA") assert shelly.cmdline == "executable inpA -o outA" # True is provided for outA, so the formatted template should be used as outA value - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA", outA=True - ) - assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" + shelly = Shelly(inpA="inpA", outA=True) + assert shelly.cmdline == f"executable inpA -o {Path.cwd() / 'inpA_out'}" # False is provided for outA, so the outA shouldn't be used - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA", outA=False - ) + shelly = Shelly(inpA="inpA", outA=False) assert shelly.cmdline == "executable inpA" def test_shell_cmd_inputs_template_7(tmp_path: Path): - """additional inputs uses output_file_template with a suffix (no extension) + """additional inputs uses path_template with a suffix (no extension) no keep_extension is used """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help_string": "outA", - "argstr": "", - "output_file_template": "{inpA}_out", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + outA: File = shell.outarg( + position=2, + help="outA", + argstr="", + path_template="{inpA}_out", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", + ) inpA_file = tmp_path / "a_file.txt" inpA_file.write_text("content") - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA=inpA_file - ) + shelly = Shelly(inpA=inpA_file) # outA should be formatted in a way that that .txt goes to the end assert ( shelly.cmdline - == f"executable {tmp_path / 'a_file.txt'} {shelly.output_dir / 'a_file_out.txt'}" + == f"executable {tmp_path / 'a_file.txt'} {Path.cwd() / 'a_file_out.txt'}" ) def test_shell_cmd_inputs_template_7a(tmp_path: Path): - """additional inputs uses output_file_template with a suffix (no extension) + """additional inputs uses path_template with a suffix (no extension) keep_extension is True (as default) """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help_string": "outA", - "argstr": "", - "keep_extension": True, - "output_file_template": "{inpA}_out", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + outA: File = shell.outarg( + position=2, + help="outA", + argstr="", + keep_extension=True, + path_template="{inpA}_out", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", + ) inpA_file = tmp_path / "a_file.txt" inpA_file.write_text("content") - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA=inpA_file - ) + shelly = Shelly(inpA=inpA_file) # outA should be formatted in a way that that .txt goes to the end assert ( shelly.cmdline - == f"executable {tmp_path / 'a_file.txt'} {shelly.output_dir / 'a_file_out.txt'}" + == f"executable {tmp_path / 'a_file.txt'} {Path.cwd() / 'a_file_out.txt'}" ) def test_shell_cmd_inputs_template_7b(tmp_path: Path): - """additional inputs uses output_file_template with a suffix (no extension) + """additional inputs uses path_template with a suffix (no extension) keep extension is False (so the extension is removed when creating the output) """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help_string": "outA", - "argstr": "", - "keep_extension": False, - "output_file_template": "{inpA}_out", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + outA: File = shell.outarg( + position=2, + help="outA", + argstr="", + keep_extension=False, + path_template="{inpA}_out", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", + ) inpA_file = tmp_path / "a_file.txt" inpA_file.write_text("content") - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA=inpA_file - ) + shelly = Shelly(inpA=inpA_file) # outA should be formatted in a way that that .txt goes to the end assert ( shelly.cmdline - == f"executable {tmp_path / 'a_file.txt'} {shelly.output_dir / 'a_file_out'}" + == f"executable {tmp_path / 'a_file.txt'} {Path.cwd() / 'a_file_out'}" ) def test_shell_cmd_inputs_template_8(tmp_path: Path): - """additional inputs uses output_file_template with a suffix and an extension""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help_string": "outA", - "argstr": "", - "output_file_template": "{inpA}_out.txt", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + """additional inputs uses path_template with a suffix and an extension""" + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + outA: File = shell.outarg( + position=2, + help="outA", + argstr="", + path_template="{inpA}_out.txt", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", + ) inpA_file = tmp_path / "a_file.t" inpA_file.write_text("content") - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA=inpA_file - ) + shelly = Shelly(inpA=inpA_file) # outA should be formatted in a way that inpA extension is removed and the template extension is used assert ( shelly.cmdline - == f"executable {tmp_path / 'a_file.t'} {shelly.output_dir / 'a_file_out.txt'}" + == f"executable {tmp_path / 'a_file.t'} {Path.cwd() / 'a_file_out.txt'}" ) def test_shell_cmd_inputs_template_9(tmp_path: Path): - """additional inputs, one uses output_file_template with two fields: + """additional inputs, one uses path_template with two fields: one File and one ints - the output should be recreated from the template """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "inpInt", - attr.ib( - type=int, - metadata={ - "position": 2, - "help_string": "inp int", - "argstr": "-i", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 3, - "help_string": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_{inpInt}_out.txt", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + outA: File = shell.outarg( + position=3, + help="outA", + argstr="-o", + path_template="{inpA}_{inpInt}_out.txt", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", + ) + inpInt: int = shell.arg( + position=2, + help="inp int", + argstr="-i", + ) inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA=inpA_file, inpInt=3 - ) + shelly = Shelly(inpA=inpA_file, inpInt=3) assert ( shelly.cmdline - == f"executable {tmp_path / 'inpA.t'} -i 3 -o {shelly.output_dir / 'inpA_3_out.txt'}" + == f"executable {tmp_path / 'inpA.t'} -i 3 -o {Path.cwd() / 'inpA_3_out.txt'}" ) # checking if outA in the output fields - assert shelly.output_names == ["return_code", "stdout", "stderr", "outA"] + assert get_output_names(shelly) == ["outA", "return_code", "stderr", "stdout"] def test_shell_cmd_inputs_template_9a(tmp_path: Path): - """additional inputs, one uses output_file_template with two fields: + """additional inputs, one uses path_template with two fields: one file and one string without extension - should be fine """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "inpStr", - attr.ib( - type=str, - metadata={ - "position": 2, - "help_string": "inp str", - "argstr": "-i", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 3, - "help_string": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_{inpStr}_out.txt", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + outA: File = shell.outarg( + position=3, + help="outA", + argstr="-o", + path_template="{inpA}_{inpStr}_out.txt", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", + ) + inpStr: str = shell.arg( + position=2, + help="inp str", + argstr="-i", + ) inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA=inpA_file, inpStr="hola" - ) + shelly = Shelly(inpA=inpA_file, inpStr="hola") assert ( shelly.cmdline - == f"executable {tmp_path / 'inpA.t'} -i hola -o {shelly.output_dir / 'inpA_hola_out.txt'}" + == f"executable {tmp_path / 'inpA.t'} -i hola -o {Path.cwd() / 'inpA_hola_out.txt'}" ) # checking if outA in the output fields - assert shelly.output_names == ["return_code", "stdout", "stderr", "outA"] + assert get_output_names(shelly) == ["outA", "return_code", "stderr", "stdout"] def test_shell_cmd_inputs_template_9b_err(tmp_path: Path): - """output_file_template with two fields that are both Files, + """path_template with two fields that are both Files, an exception should be raised """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "inpFile", - attr.ib( - type=File, - metadata={ - "position": 2, - "help_string": "inp file", - "argstr": "-i", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 3, - "help_string": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_{inpFile}_out.txt", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + + outA: File = shell.outarg( + position=3, + help="outA", + argstr="-o", + path_template="{inpA}_{inpFile}_out.txt", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", + ) + inpFile: File = shell.arg( + position=2, + help="inp file", + argstr="-i", + ) inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") @@ -1613,9 +1093,7 @@ def test_shell_cmd_inputs_template_9b_err(tmp_path: Path): inpFile_file = tmp_path / "inpFile.t" inpFile_file.write_text("content") - shelly = ShellCommandTask( - executable="executable", - input_spec=my_input_spec, + shelly = Shelly( inpA=inpA_file, inpFile=inpFile_file, ) @@ -1625,58 +1103,38 @@ def test_shell_cmd_inputs_template_9b_err(tmp_path: Path): def test_shell_cmd_inputs_template_9c_err(tmp_path: Path): - """output_file_template with two fields: a file and a string with extension, + """path_template with two fields: a file and a string with extension, that should be used as an additional file and the exception should be raised """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "inpStr", - attr.ib( - type=str, - metadata={ - "position": 2, - "help_string": "inp str with extension", - "argstr": "-i", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 3, - "help_string": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_{inpStr}_out.txt", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + + outA: File = shell.outarg( + position=3, + help="outA", + argstr="-o", + path_template="{inpA}_{inpStr}_out.txt", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", + ) + inpStr: Path = shell.arg( + position=2, + help="inp str with extension", + argstr="-i", + ) inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") - shelly = ShellCommandTask( - executable="executable", - input_spec=my_input_spec, + shelly = Shelly( inpA=inpA_file, inpStr="hola.txt", ) @@ -1687,102 +1145,69 @@ def test_shell_cmd_inputs_template_9c_err(tmp_path: Path): def test_shell_cmd_inputs_template_10(): - """output_file_template uses a float field with formatting""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=float, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "{inpA:.1f}", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help_string": "outA", - "argstr": "-o", - "output_file_template": "file_{inpA:.1f}_out", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + """path_template uses a float field with formatting""" - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA=3.3456 - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + + outA: File = shell.outarg( + position=2, + help="outA", + argstr="-o", + path_template="file_{inpA:.1f}_out", + ) + + executable = "executable" + + inpA: float = shell.arg( + position=1, + help="inpA", + argstr="{inpA:.1f}", + ) + + shelly = Shelly(inpA=3.3456) # outA has argstr in the metadata fields, so it's a part of the command line # the full path will be use din the command line - assert shelly.cmdline == f"executable 3.3 -o {shelly.output_dir / 'file_3.3_out'}" + assert shelly.cmdline == f"executable 3.3 -o {Path.cwd() / 'file_3.3_out'}" # checking if outA in the output fields - assert shelly.output_names == ["return_code", "stdout", "stderr", "outA"] + assert get_output_names(shelly) == ["outA", "return_code", "stderr", "stdout"] def test_shell_cmd_inputs_template_requires_1(): - """Given an input specification with a templated output file subject to required fields, + """Given an input definition with a templated output file subject to required fields, ensure the field is set only when all requirements are met.""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "in_file", - attr.ib( - type=str, - metadata={ - "help_string": "input file", - "mandatory": True, - "argstr": "", - }, - ), - ), - ( - "with_tpl", - attr.ib( - type=bool, - metadata={"help_string": "enable template"}, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "help_string": "output file", - "argstr": "--tpl", - "output_file_template": "tpl.{in_file}", - "requires": {"with_tpl"}, - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + + out_file: File | None = shell.outarg( + help="output file", + argstr="--tpl", + path_template="tpl.{in_file}", + requires={"with_tpl"}, + ) + + executable = "executable" + + in_file: str = shell.arg( + help="input file", + argstr="", + ) + with_tpl: bool = shell.arg(help="enable template", default=False) # When requirements are not met. - shelly = ShellCommandTask( - executable="cmd", input_spec=my_input_spec, in_file="in.file" - ) + shelly = Shelly(executable="cmd", in_file="in.file") assert "--tpl" not in shelly.cmdline # When requirements are met. - shelly.inputs.with_tpl = True + shelly.with_tpl = True assert "tpl.in.file" in shelly.cmdline def test_shell_cmd_inputs_template_function_1(): - """one input field uses output_file_template that is a simple function + """one input field uses path_template that is a simple function this can be easily done by simple template as in test_shell_cmd_inputs_template_1 """ @@ -1790,46 +1215,32 @@ def test_shell_cmd_inputs_template_function_1(): def template_fun(inputs): return "{inpA}_out" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help_string": "outA", - "argstr": "-o", - "output_file_template": template_fun, - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA" - ) + outA: File = shell.outarg( + position=2, + help="outA", + argstr="-o", + path_template=template_fun, + ) + + executable = "executable" - assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", + ) + + shelly = Shelly(inpA="inpA") + + assert shelly.cmdline == f"executable inpA -o {Path.cwd() / 'inpA_out'}" def test_shell_cmd_inputs_template_function_2(): - """one input field uses output_file_template that is a function, + """one input field uses path_template that is a function, depending on a value of an input it returns different template """ @@ -1840,104 +1251,35 @@ def template_fun(inputs): else: return "{inpA}_odd" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "inpB", - attr.ib( - type=int, - metadata={ - "help_string": "inpB", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help_string": "outA", - "argstr": "-o", - "output_file_template": template_fun, - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): - shelly = ShellCommandTask( - executable="executable", - input_spec=my_input_spec, - inpA="inpA", - inpB=1, - ) + outA: File = shell.outarg( + position=2, + help="outA", + argstr="-o", + path_template=template_fun, + ) - assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_odd'}" + executable = "executable" + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", + ) + inpB: int = shell.arg( + help="inpB", + argstr=None, + ) -def test_shell_cmd_inputs_template_1_st(): - """additional inputs, one uses output_file_template (and argstr) - testing cmdline when splitter defined - """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help_string": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help_string": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_out", - }, - ), - ), - ], - bases=(ShellSpec,), + shelly = Shelly( + inpA="inpA", + inpB=1, ) - inpA = ["inpA_1", "inpA_2"] - ShellCommandTask( - name="f", - executable="executable", - input_spec=my_input_spec, - ).split("inpA", inpA=inpA) - - # cmdline_list = shelly.cmdline - # assert len(cmdline_list) == 2 - # for i in range(2): - # path_out = Path(shelly.output_dir[i]) / f"{inpA[i]}_out" - # assert cmdline_list[i] == f"executable {inpA[i]} -o {path_out}" + assert shelly.cmdline == f"executable inpA -o {Path.cwd() / 'inpA_odd'}" # TODO: after deciding how we use requires/templates @@ -1945,252 +1287,164 @@ def test_shell_cmd_inputs_denoise_image( tmp_path, ): """example from #279""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "image_dimensionality", - attr.ib( - type=int, - metadata={ - "help_string": """ + + @shell.define + class DenoiseImage(ShellDef["DenoiseImage.Outputs"]): + class Outputs(ShellOutputs): + + correctedImage: File = shell.outarg( + help=""" + The output consists of the noise corrected version of the input image. + Optionally, one can also output the estimated noise image. """, + path_template="{inputImageFilename}_out", + argstr=None, + ) + noiseImage: File | None = shell.outarg( + help=""" + The output consists of the noise corrected version of the input image. + Optionally, one can also output the estimated noise image. """, + path_template="{inputImageFilename}_noise", + argstr=None, + ) + + executable = "executable" + + image_dimensionality: int | None = shell.arg( + help=""" 2/3/4 This option forces the image to be treated as a specified-dimensional image. If not specified, the program tries to infer the dimensionality from the input image. """, - "allowed_values": [2, 3, 4], - "argstr": "-d", - }, - ), - ), - ( - "inputImageFilename", - attr.ib( - type=File, - metadata={ - "help_string": "A scalar image is expected as input for noise correction.", - "argstr": "-i", - "mandatory": True, - }, - ), - ), - ( - "noise_model", - attr.ib( - type=str, - metadata={ - "help_string": """ - Rician/(Gaussian) - Employ a Rician or Gaussian noise model. - """, - "allowed_values": ["Rician", "Gaussian"], - "argstr": "-n", - }, - ), - ), - ( - "maskImageFilename", - attr.ib( - type=str, - metadata={ - "help_string": "If a mask image is specified, denoising is only performed in the mask region.", - "argstr": "-x", - }, - ), - ), - ( - "shrink_factor", - attr.ib( - type=int, - default=1, - metadata={ - "help_string": """ - (1)/2/3/... - Running noise correction on large images can be time consuming. - To lessen computation time, the input image can be resampled. - The shrink factor, specified as a single integer, describes this - resampling. Shrink factor = 1 is the default. - """, - "argstr": "-s", - }, - ), - ), - ( - "patch_radius", - attr.ib( - type=int, - default=1, - metadata={ - "help_string": "Patch radius. Default = 1x1x1", - "argstr": "-p", - }, - ), - ), - ( - "search_radius", - attr.ib( - type=int, - default=2, - metadata={ - "help_string": "Search radius. Default = 2x2x2.", - "argstr": "-r", - }, - ), - ), - ( - "correctedImage", - attr.ib( - type=str, - metadata={ - "help_string": """ - The output consists of the noise corrected version of the input image. - Optionally, one can also output the estimated noise image. - """, - "output_file_template": "{inputImageFilename}_out", - }, - ), - ), - ( - "noiseImage", - attr.ib( - type=ty.Union[str, bool], - default=False, - metadata={ - "help_string": """ - The output consists of the noise corrected version of the input image. - Optionally, one can also output the estimated noise image. - """, - "output_file_template": "{inputImageFilename}_noise", - }, - ), - ), - ( - "output", - attr.ib( - type=str, - metadata={ - "help_string": "Combined output", - "argstr": "-o [{correctedImage}, {noiseImage}]", - "position": -1, - "readonly": True, - }, - ), - ), - ( - "version", - attr.ib( - type=bool, - default=False, - metadata={ - "help_string": "Get Version Information.", - "argstr": "--version", - }, - ), - ), - ( - "verbose", - attr.ib( - type=int, - default=0, - metadata={"help_string": "(0)/1. Verbose output. ", "argstr": "-v"}, - ), - ), - ( - "help_short", - attr.ib( - type=bool, - default=False, - metadata={ - "help_string": "Print the help menu (short version)", - "argstr": "-h", - }, - ), - ), - ( - "help", - attr.ib( - type=int, - metadata={ - "help_string": "Print the help menu.", - "argstr": "--help", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + allowed_values=[2, 3, 4, None], + default=None, + argstr="-d", + position=1, + ) + inputImageFilename: File = shell.arg( + help="A scalar image is expected as input for noise correction.", + argstr="-i", + position=2, + ) + noise_model: str | None = shell.arg( + default=None, + help=""" Rician/(Gaussian) Employ a Rician or Gaussian noise model. """, + allowed_values=["Rician", "Gaussian"], + argstr="-n", + ) + maskImageFilename: str | None = shell.arg( + default=None, + help="If a mask image is specified, denoising is only performed in the mask region.", + argstr="-x", + ) + shrink_factor: int = shell.arg( + default=1, + help=""" + (1)/2/3/... + Running noise correction on large images can be time consuming. + To lessen computation time, the input image can be resampled. + The shrink factor, specified as a single integer, describes this + resampling. Shrink factor = 1 is the default. """, + argstr="-s", + position=3, + ) + patch_radius: int = shell.arg( + default=1, help="Patch radius. Default = 1x1x1", argstr="-p", position=4 + ) + search_radius: int = shell.arg( + default=2, help="Search radius. Default = 2x2x2.", argstr="-r", position=5 + ) + output: str = shell.arg( + help="Combined output", + argstr="-o [{correctedImage}, {noiseImage}]", + position=-1, + readonly=True, + ) + version: bool = shell.arg( + default=False, + help="Get Version Information.", + argstr="--version", + ) + verbose: int = shell.arg(default=0, help="(0)/1. Verbose output. ", argstr="-v") + help_short: bool = shell.arg( + default=False, + help="Print the help menu (short version)", + argstr="-h", + ) + help: int | None = shell.arg( + default=None, + help="Print the help menu.", + argstr="--help", + ) my_input_file = tmp_path / "a_file.ext" my_input_file.write_text("content") # no input provided - shelly = ShellCommandTask(executable="DenoiseImage", input_spec=my_input_spec) + denoise_image = DenoiseImage( + executable="DenoiseImage", + ) with pytest.raises(Exception) as e: - shelly.cmdline - assert "mandatory" in str(e.value) + denoise_image.cmdline + assert "mandatory" in str(e.value).lower() # input file name, noiseImage is not set, so using default value False - shelly = ShellCommandTask( + denoise_image = DenoiseImage( executable="DenoiseImage", inputImageFilename=my_input_file, - input_spec=my_input_spec, ) assert ( - shelly.cmdline - == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -o [{shelly.output_dir / 'a_file_out.ext'}]" + denoise_image.cmdline + == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -o [{Path.cwd() / 'a_file_out.ext'}]" ) # input file name, noiseImage is set to True, so template is used in the output - shelly = ShellCommandTask( + denoise_image = DenoiseImage( executable="DenoiseImage", inputImageFilename=my_input_file, - input_spec=my_input_spec, noiseImage=True, ) assert ( - shelly.cmdline == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 " - f"-o [{shelly.output_dir / 'a_file_out.ext'}, {str(shelly.output_dir / 'a_file_noise.ext')}]" + denoise_image.cmdline + == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 " + f"-o [{Path.cwd() / 'a_file_out.ext'}, {str(Path.cwd() / 'a_file_noise.ext')}]" ) # input file name and help_short - shelly = ShellCommandTask( + denoise_image = DenoiseImage( executable="DenoiseImage", inputImageFilename=my_input_file, help_short=True, - input_spec=my_input_spec, ) assert ( - shelly.cmdline - == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -h -o [{shelly.output_dir / 'a_file_out.ext'}]" + denoise_image.cmdline + == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -h -o [{Path.cwd() / 'a_file_out.ext'}]" ) - assert shelly.output_names == [ - "return_code", - "stdout", - "stderr", + assert get_output_names(denoise_image) == [ "correctedImage", "noiseImage", + "return_code", + "stderr", + "stdout", ] # adding image_dimensionality that has allowed_values [2, 3, 4] - shelly = ShellCommandTask( + denoise_image = DenoiseImage( executable="DenoiseImage", inputImageFilename=my_input_file, - input_spec=my_input_spec, image_dimensionality=2, ) assert ( - shelly.cmdline - == f"DenoiseImage -d 2 -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -o [{shelly.output_dir / 'a_file_out.ext'}]" + denoise_image.cmdline + == f"DenoiseImage -d 2 -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -o [{Path.cwd() / 'a_file_out.ext'}]" ) # adding image_dimensionality that has allowed_values [2, 3, 4] and providing 5 - exception should be raised with pytest.raises(ValueError) as excinfo: - shelly = ShellCommandTask( + denoise_image = DenoiseImage( executable="DenoiseImage", inputImageFilename=my_input_file, - input_spec=my_input_spec, image_dimensionality=5, ) assert "value of image_dimensionality" in str(excinfo.value) @@ -2199,99 +1453,75 @@ def test_shell_cmd_inputs_denoise_image( # tests with XOR in input metadata -class SimpleTaskXor(ShellCommandTask): - input_fields = [ - ( - "input_1", - str, - { - "help_string": "help", - "mandatory": True, - "xor": ("input_1", "input_2", "input_3"), - }, - ), - ( - "input_2", - bool, - { - "help_string": "help", - "mandatory": True, - "argstr": "--i2", - "xor": ("input_1", "input_2", "input_3"), - }, - ), - ( - "input_3", - bool, - { - "help_string": "help", - "mandatory": True, - "xor": ("input_1", "input_2", "input_3"), - }, - ), - ] - task_input_spec = SpecInfo(name="Input", fields=input_fields, bases=(ShellSpec,)) - task_output_fields = [] - task_output_spec = SpecInfo( - name="Output", fields=task_output_fields, bases=(ShellOutSpec,) +@shell.define(xor=("input_1", "input_2", "input_3")) +class SimpleXor(ShellDef["SimpleTaskXor.Outputs"]): + + input_1: str | None = shell.arg( + default=None, + help="help", + ) + input_2: bool | None = shell.arg( + default=None, + help="help", + argstr="--i2", + ) + input_3: bool | None = shell.arg( + default=None, + help="help", ) - input_spec = task_input_spec - output_spec = task_output_spec + @shell.outputs + class Outputs(ShellOutputs): + pass + executable = "cmd" def test_task_inputs_mandatory_with_xOR_one_mandatory_is_OK(): - """input spec with mandatory inputs""" - task = SimpleTaskXor() - task.inputs.input_1 = "Input1" - task.inputs.input_2 = attr.NOTHING - task.inputs.check_fields_input_spec() + """input definition with mandatory inputs""" + simple_xor = SimpleXor() + simple_xor.input_1 = "Input1" + simple_xor._check_rules() def test_task_inputs_mandatory_with_xOR_one_mandatory_out_3_is_OK(): - """input spec with mandatory inputs""" - task = SimpleTaskXor() - task.inputs.input_1 = attr.NOTHING - task.inputs.input_2 = attr.NOTHING - task.inputs.input_3 = True - task.inputs.check_fields_input_spec() + """input definition with mandatory inputs""" + simple_xor = SimpleXor() + simple_xor.input_3 = True + simple_xor._check_rules() def test_task_inputs_mandatory_with_xOR_zero_mandatory_raises_error(): - """input spec with mandatory inputs""" - task = SimpleTaskXor() - task.inputs.input_1 = attr.NOTHING - task.inputs.input_2 = attr.NOTHING - with pytest.raises(Exception) as excinfo: - task.inputs.check_fields_input_spec() - assert "input_1 is mandatory" in str(excinfo.value) - assert "no alternative provided by ['input_2', 'input_3']" in str(excinfo.value) - assert excinfo.type is AttributeError + """input definition with mandatory inputs""" + simple_xor = SimpleXor() + simple_xor.input_2 = False + with pytest.raises( + ValueError, match="At least one of the mutually exclusive fields should be set:" + ): + simple_xor._check_rules() def test_task_inputs_mandatory_with_xOR_two_mandatories_raises_error(): - """input spec with mandatory inputs""" - task = SimpleTaskXor() - task.inputs.input_1 = "Input1" - task.inputs.input_2 = True + """input definition with mandatory inputs""" + simple_xor = SimpleXor() + simple_xor.input_1 = "Input1" + simple_xor.input_2 = True - with pytest.raises(Exception) as excinfo: - task.inputs.check_fields_input_spec() - assert "input_1 is mutually exclusive with ['input_2']" in str(excinfo.value) - assert excinfo.type is AttributeError + with pytest.raises( + ValueError, match="Mutually exclusive fields .* are set together" + ): + simple_xor._check_rules() def test_task_inputs_mandatory_with_xOR_3_mandatories_raises_error(): - """input spec with mandatory inputs""" - task = SimpleTaskXor() - task.inputs.input_1 = "Input1" - task.inputs.input_2 = True - task.inputs.input_3 = False - - with pytest.raises(Exception) as excinfo: - task.inputs.check_fields_input_spec() - assert "input_1 is mutually exclusive with ['input_2', 'input_3']" in str( - excinfo.value - ) - assert excinfo.type is AttributeError + """input definition with mandatory inputs""" + simple_xor = SimpleXor() + simple_xor.input_1 = "Input1" + simple_xor.input_2 = True + simple_xor.input_3 = False + + with pytest.raises( + ValueError, + match=r".*Mutually exclusive fields \(input_1='Input1', input_2=True\) are set together", + ): + simple_xor._check_rules() diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 791575adc1..0d12b6fee7 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -1,13 +1,11 @@ import shutil import subprocess as sp import pytest -import attr - -from ..task import ShellCommandTask -from ..submitter import Submitter -from ..core import Workflow -from ..specs import ShellOutSpec, SpecInfo, File, ShellSpec -from ..environments import Singularity +from pydra.engine.submitter import Submitter +from pydra.engine.specs import ShellDef, ShellOutputs +from pydra.design import shell, workflow +from fileformats.generic import File +from pydra.engine.environments import Singularity need_docker = pytest.mark.skipif( @@ -30,19 +28,11 @@ def test_singularity_1_nosubm(tmp_path): """ cmd = "pwd" image = "docker://alpine" - singu = ShellCommandTask( - name="singu", - executable=cmd, - environment=Singularity(image=image), - cache_dir=tmp_path, - ) - assert singu.environment.image == "docker://alpine" - assert isinstance(singu.environment, Singularity) - assert singu.cmdline == cmd - - res = singu() - assert "/mnt/pydra" in res.output.stdout - assert res.output.return_code == 0 + Singu = shell.define(cmd) + singu = Singu() + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert "/mnt/pydra" in outputs.stdout + assert outputs.return_code == 0 @need_singularity @@ -52,17 +42,16 @@ def test_singularity_2_nosubm(tmp_path): """ cmd = ["echo", "hail", "pydra"] image = "docker://alpine" - singu = ShellCommandTask( - name="singu", - executable=cmd, + Singu = shell.define(" ".join(cmd)) + singu = Singu() + assert singu.cmdline == " ".join(cmd) + + outputs = singu( environment=Singularity(image=image), cache_dir=tmp_path, ) - assert singu.cmdline == " ".join(cmd) - - res = singu() - assert res.output.stdout.strip() == " ".join(cmd[1:]) - assert res.output.return_code == 0 + assert outputs.stdout.strip() == " ".join(cmd[1:]) + assert outputs.return_code == 0 @need_singularity @@ -72,20 +61,18 @@ def test_singularity_2(plugin, tmp_path): """ cmd = ["echo", "hail", "pydra"] image = "docker://alpine" + Singu = shell.define(" ".join(cmd)) + singu = Singu() - singu = ShellCommandTask( - name="singu", - executable=cmd, - environment=Singularity(image=image), - cache_dir=tmp_path, - ) assert singu.cmdline == " ".join(cmd) - with Submitter(plugin=plugin) as sub: - singu(submitter=sub) - res = singu.result() - assert res.output.stdout.strip() == " ".join(cmd[1:]) - assert res.output.return_code == 0 + with Submitter( + worker=plugin, environment=Singularity(image=image), cache_dir=tmp_path + ) as sub: + res = sub(singu) + assert not res.errored, "\n".join(res.errors["error message"]) + assert res.outputs.stdout.strip() == " ".join(cmd[1:]) + assert res.outputs.return_code == 0 @need_singularity @@ -97,20 +84,18 @@ def test_singularity_2a(plugin, tmp_path): cmd_args = ["hail", "pydra"] # separate command into exec + args image = "docker://alpine" - singu = ShellCommandTask( - name="singu", - executable=cmd_exec, - args=cmd_args, - environment=Singularity(image=image), - cache_dir=tmp_path, - ) + Singu = shell.define(cmd_exec) + singu = Singu(additional_args=cmd_args) assert singu.cmdline == f"{cmd_exec} {' '.join(cmd_args)}" - with Submitter(plugin=plugin) as sub: - singu(submitter=sub) - res = singu.result() - assert res.output.stdout.strip() == " ".join(cmd_args) - assert res.output.return_code == 0 + with Submitter( + worker="debug", environment=Singularity(image=image), cache_dir=tmp_path + ) as sub: + res = sub(singu) + + assert not res.errored, "\n".join(res.errors["error message"]) + assert res.outputs.stdout.strip() == " ".join(cmd_args) + assert res.outputs.return_code == 0 # tests with State @@ -121,17 +106,20 @@ def test_singularity_st_1(plugin, tmp_path): """commands without arguments in container splitter = executable """ - cmd = ["pwd", "ls"] + cmd = ["whoami", "pwd", "ls"] image = "docker://alpine" - singu = ShellCommandTask( - name="singu", environment=Singularity(image=image), cache_dir=tmp_path - ).split("executable", executable=cmd) - assert singu.state.splitter == "singu.executable" + Singu = shell.define("dummy") + singu = Singu().split("executable", executable=cmd) - res = singu(plugin=plugin) - assert "/mnt/pydra" in res[0].output.stdout - assert res[1].output.stdout == "" - assert res[0].output.return_code == res[1].output.return_code == 0 + outputs = singu( + plugin=plugin, + environment=Singularity(image=image, xargs=["--fakeroot"]), + cache_dir=tmp_path, + ) + assert outputs.stdout[0].strip() == "root" + assert "/mnt/pydra" in outputs.stdout[1] + assert outputs.stdout[2].strip() == "_task.pklz" + assert outputs.return_code == [0, 0, 0] @need_singularity @@ -145,17 +133,16 @@ def test_singularity_st_2(tmp_path, n): """splitter over args (checking bigger splitters if slurm available)""" args_n = list(range(n)) image = "docker://alpine" - singu = ShellCommandTask( - name="singu", - executable="echo", - environment=Singularity(image=image), - cache_dir=tmp_path, - ).split("args", args=args_n) - assert singu.state.splitter == "singu.args" - res = singu(plugin="slurm") - assert "1" in res[1].output.stdout - assert str(n - 1) in res[-1].output.stdout - assert res[0].output.return_code == res[1].output.return_code == 0 + Singu = shell.define("echo") + singu = Singu().split("args", args=args_n) + with Submitter( + plugin="slurm", environment=Singularity(image=image), cache_dir=tmp_path + ) as sub: + res = sub(singu) + + assert "1" in res.outputs.stdout[1] + assert str(n - 1) in res.outputs.stdout[-1] + assert res.outputs.return_code[0] == res.outputs.return_code[1] == 0 # tests with customized output_spec @@ -170,25 +157,20 @@ def test_singularity_outputspec_1(plugin, tmp_path): cmd = ["touch", "newfile_tmp.txt"] image = "docker://alpine" - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "newfile_tmp.txt")], - bases=(ShellOutSpec,), - ) - singu = ShellCommandTask( - name="singu", - environment=Singularity(image=image), - executable=cmd, - output_spec=my_output_spec, - cache_dir=tmp_path, + Singu = shell.define( + " ".join(cmd), + outputs=[ + shell.outarg(name="newfile", type=File, path_template="newfile_tmp.txt") + ], ) + singu = Singu() - with Submitter(plugin=plugin) as sub: - singu(submitter=sub) + with Submitter(environment=Singularity(image=image), cache_dir=tmp_path) as sub: + res = sub(singu) - res = singu.result() - assert res.output.stdout == "" - assert res.output.newfile.fspath.exists() + assert not res.errored, "\n".join(res.errors["error message"]) + assert res.outputs.stdout == "" + assert res.outputs.newfile.fspath.exists() # tests with customised input_spec @@ -196,7 +178,7 @@ def test_singularity_outputspec_1(plugin, tmp_path): @need_singularity def test_singularity_inputspec_1(plugin, tmp_path): - """a simple customized input spec for singularity task""" + """a simple customized input definition for singularity task""" filename = str((tmp_path / "file_pydra.txt")) with open(filename, "w") as f: f.write("hello from pydra") @@ -204,42 +186,28 @@ def test_singularity_inputspec_1(plugin, tmp_path): cmd = "cat" image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellSpec,), ) - singu = ShellCommandTask( - name="singu", - environment=Singularity(image=image), - executable=cmd, - file=filename, - input_spec=my_input_spec, - strip=True, - cache_dir=tmp_path, - ) + singu = Singu(file=filename) - res = singu() - assert res.output.stdout == "hello from pydra" + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout.strip() == "hello from pydra" @need_singularity def test_singularity_inputspec_1a(plugin, tmp_path): - """a simple customized input spec for singularity task + """a simple customized input definition for singularity task a default value is used """ filename = str((tmp_path / "file_pydra.txt")) @@ -249,37 +217,28 @@ def test_singularity_inputspec_1a(plugin, tmp_path): cmd = "cat" image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - default=filename, - metadata={"position": 1, "argstr": "", "help_string": "input file"}, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + default=filename, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellSpec,), - ) - - singu = ShellCommandTask( - name="singu", - environment=Singularity(image=image), - executable=cmd, - input_spec=my_input_spec, - strip=True, - cache_dir=tmp_path, ) + singu = Singu(file=filename) - res = singu() - assert res.output.stdout == "hello from pydra" + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout.strip() == "hello from pydra" @need_singularity def test_singularity_inputspec_2(plugin, tmp_path): - """a customized input spec with two fields for singularity task""" + """a customized input definition with two fields for singularity task""" filename_1 = tmp_path / "file_pydra.txt" with open(filename_1, "w") as f: f.write("hello from pydra\n") @@ -291,53 +250,36 @@ def test_singularity_inputspec_2(plugin, tmp_path): cmd = "cat" image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help_string": "input file 1", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file1", + type=File, + position=1, + argstr="", + help="input file 1", ), - ( - "file2", - attr.ib( - type=File, - default=filename_2, - metadata={ - "position": 2, - "argstr": "", - "help_string": "input file 2", - }, - ), + shell.arg( + name="file2", + type=File, + default=filename_2, + position=2, + argstr="", + help="input file 2", ), ], - bases=(ShellSpec,), ) - singu = ShellCommandTask( - name="singu", - environment=Singularity(image=image), - executable=cmd, - file1=filename_1, - input_spec=my_input_spec, - strip=True, - cache_dir=tmp_path, - ) + singu = Singu(file1=filename_1) - res = singu() - assert res.output.stdout == "hello from pydra\nhave a nice one" + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout == "hello from pydra\nhave a nice one" @need_singularity def test_singularity_inputspec_2a_except(plugin, tmp_path): - """a customized input spec with two fields + """a customized input definition with two fields first one uses a default, and second doesn't - raises a dataclass exception """ filename_1 = tmp_path / "file_pydra.txt" @@ -351,52 +293,35 @@ def test_singularity_inputspec_2a_except(plugin, tmp_path): image = "docker://alpine" # the field with default value can't be before value without default - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - attr.ib( - type=File, - default=filename_1, - metadata={ - "position": 1, - "argstr": "", - "help_string": "input file 1", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file1", + type=File, + default=filename_1, + position=1, + argstr="", + help="input file 1", ), - ( - "file2", - attr.ib( - type=File, - metadata={ - "position": 2, - "argstr": "", - "help_string": "input file 2", - }, - ), + shell.arg( + name="file2", + type=File, + position=2, + argstr="", + help="input file 2", ), ], - bases=(ShellSpec,), ) - singu = ShellCommandTask( - name="singu", - environment=Singularity(image=image), - executable=cmd, - file2=filename_2, - input_spec=my_input_spec, - strip=True, - cache_dir=tmp_path, - ) - res = singu() - assert res.output.stdout == "hello from pydra\nhave a nice one" + singu = Singu(file2=filename_2) + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout == "hello from pydra\nhave a nice one" @need_singularity def test_singularity_inputspec_2a(plugin, tmp_path): - """a customized input spec with two fields + """a customized input definition with two fields first one uses a default value, this is fine even if the second field is not using any defaults """ @@ -411,48 +336,31 @@ def test_singularity_inputspec_2a(plugin, tmp_path): image = "docker://alpine" # if you want set default in the first field you can use default_value in metadata - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - attr.ib( - type=File, - default=filename_1, - metadata={ - "position": 1, - "argstr": "", - "help_string": "input file 1", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file1", + type=File, + default=filename_1, + position=1, + argstr="", + help="input file 1", ), - ( - "file2", - attr.ib( - type=File, - metadata={ - "position": 2, - "argstr": "", - "help_string": "input file 2", - }, - ), + shell.arg( + name="file2", + type=File, + position=2, + argstr="", + help="input file 2", ), ], - bases=(ShellSpec,), ) - singu = ShellCommandTask( - name="singu", - environment=Singularity(image=image), - executable=cmd, - file2=filename_2, - input_spec=my_input_spec, - strip=True, - cache_dir=tmp_path, - ) + singu = Singu(file2=filename_2) - res = singu() - assert res.output.stdout == "hello from pydra\nhave a nice one" + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout == "hello from pydra\nhave a nice one" @need_singularity @@ -465,54 +373,34 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): with open(file, "w") as f: f.write("hello from pydra\n") - cmd = ["sed", "-is", "s/hello/hi/"] image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help_string": "orig file", - "mandatory": True, - "copyfile": True, - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{orig_file}", - "help_string": "output file", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + @shell.define + class Singu(ShellDef["Singu.Outputs"]): - singu = ShellCommandTask( - name="singu", - environment=Singularity(image=image), - executable=cmd, - input_spec=my_input_spec, - orig_file=str(file), - cache_dir=tmp_path, - ) + executable = ["sed", "-is", "s/hello/hi/"] + + orig_file: File = shell.arg( + position=1, + argstr="", + help="orig file", + copy_mode=File.CopyMode.copy, + ) - res = singu() - assert res.output.stdout == "" - assert res.output.out_file.fspath.exists() + class Outputs(ShellOutputs): + out_file: File = shell.outarg( + path_template="{orig_file}.txt", # FIXME: Shouldn't have to specify the extension + help="output file", + ) + + singu = Singu(orig_file=file) + + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out_file.fspath.exists() # the file is copied, and than it is changed in place - assert res.output.out_file.fspath.parent == singu.output_dir - with open(res.output.out_file) as f: + assert outputs.out_file.fspath.parent.parent == tmp_path + with open(outputs.out_file) as f: assert "hi from pydra\n" == f.read() # the original file is unchanged with open(file) as f: @@ -521,7 +409,7 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): @need_singularity def test_singularity_inputspec_state_1(tmp_path): - """a customised input spec for a singularity file with a splitter, + """a customised input definition for a singularity file with a splitter, splitter is on files """ filename_1 = tmp_path / "file_pydra.txt" @@ -535,43 +423,30 @@ def test_singularity_inputspec_state_1(tmp_path): filename = [str(filename_1), str(filename_2)] image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellSpec,), ) - singu = ShellCommandTask( - name="singu", - environment=Singularity(image=image), - executable=cmd, - input_spec=my_input_spec, - strip=True, - cache_dir=tmp_path, - ).split("file", file=filename) + singu = Singu().split("file", file=filename) - res = singu() - assert res[0].output.stdout == "hello from pydra" - assert res[1].output.stdout == "have a nice one" + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout[0].strip() == "hello from pydra" + assert outputs.stdout[1].strip() == "have a nice one" @need_singularity def test_singularity_inputspec_state_1b(plugin, tmp_path): - """a customised input spec for a singularity file with a splitter, - files from the input spec have the same path in the local os and the container, + """a customised input definition for a singularity file with a splitter, + files from the input definition have the same path in the local os and the container, so hash is calculated and the test works fine """ file_1 = tmp_path / "file_pydra.txt" @@ -585,42 +460,29 @@ def test_singularity_inputspec_state_1b(plugin, tmp_path): filename = [str(file_1), str(file_2)] image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellSpec,), ) - singu = ShellCommandTask( - name="singu", - environment=Singularity(image=image), - executable=cmd, - input_spec=my_input_spec, - strip=True, - cache_dir=tmp_path, - ).split("file", file=filename) + singu = Singu().split("file", file=filename) - res = singu() - assert res[0].output.stdout == "hello from pydra" - assert res[1].output.stdout == "have a nice one" + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout[0].strip() == "hello from pydra" + assert outputs.stdout[1].strip() == "have a nice one" @need_singularity def test_singularity_wf_inputspec_1(plugin, tmp_path): - """a customized input spec for workflow with singularity tasks""" + """a customized input definition for workflow with singularity tasks""" filename = tmp_path / "file_pydra.txt" with open(filename, "w") as f: f.write("hello from pydra") @@ -628,51 +490,35 @@ def test_singularity_wf_inputspec_1(plugin, tmp_path): cmd = "cat" image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellSpec,), - ) - - wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmp_path) - wf.inputs.cmd = cmd - wf.inputs.file = filename - - singu = ShellCommandTask( - name="singu", - environment=Singularity(image=image), - executable=wf.lzin.cmd, - file=wf.lzin.file, - input_spec=my_input_spec, - strip=True, ) - wf.add(singu) - wf.set_output([("out", wf.singu.lzout.stdout)]) + @workflow.define + def Workflow(cmd: str, file: File) -> str: + singu = workflow.add( + Singu(executable=cmd, file=file), environment=Singularity(image=image) + ) + return singu.stdout - with Submitter(plugin="serial") as sub: - wf(submitter=sub) + with Submitter(cache_dir=tmp_path) as sub: + res = sub(Workflow(cmd=cmd, file=filename)) - res = wf.result() - assert res.output.out == "hello from pydra" + assert res.outputs.out.strip() == "hello from pydra" @need_singularity def test_singularity_wf_state_inputspec_1(plugin, tmp_path): - """a customized input spec for workflow with singularity tasks that has a state""" + """a customized input definition for workflow with singularity tasks that has a state""" file_1 = tmp_path / "file_pydra.txt" file_2 = tmp_path / "file_nice.txt" with open(file_1, "w") as f: @@ -684,52 +530,41 @@ def test_singularity_wf_state_inputspec_1(plugin, tmp_path): filename = [str(file_1), str(file_2)] image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellSpec,), ) - wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmp_path) - wf.inputs.cmd = cmd - - singu = ShellCommandTask( - name="singu", - environment=Singularity(image=image), - executable=wf.lzin.cmd, - file=wf.lzin.file, - input_spec=my_input_spec, - strip=True, - ) - wf.add(singu) - wf.split("file", file=filename) + @workflow.define + def Workflow(cmd: str, file: File) -> str: + singu = workflow.add( + Singu(executable=cmd, file=file), + environment=Singularity(image=image), + ) + return singu.stdout - wf.set_output([("out", wf.singu.lzout.stdout)]) + wf = Workflow(cmd=cmd).split("file", file=filename) - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + res = sub(wf) - res = wf.result() - assert res[0].output.out == "hello from pydra" - assert res[1].output.out == "have a nice one" + assert [o.strip() for o in res.outputs.out] == [ + "hello from pydra", + "have a nice one", + ] @need_singularity def test_singularity_wf_ndst_inputspec_1(plugin, tmp_path): - """a customized input spec for workflow with singularity tasks with states""" + """a customized input definition for workflow with singularity tasks with states""" file_1 = tmp_path / "file_pydra.txt" file_2 = tmp_path / "file_nice.txt" with open(file_1, "w") as f: @@ -741,42 +576,33 @@ def test_singularity_wf_ndst_inputspec_1(plugin, tmp_path): filename = [str(file_1), str(file_2)] image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellSpec,), ) - wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmp_path) - wf.inputs.cmd = cmd - wf.inputs.file = filename - - singu = ShellCommandTask( - name="singu", - environment=Singularity(image=image), - executable=wf.lzin.cmd, - input_spec=my_input_spec, - strip=True, - ).split("file", file=wf.lzin.file) - wf.add(singu) + @workflow.define + def Workflow(cmd: str, files: list[File]) -> list[str]: + singu = workflow.add( + Singu(executable=cmd).split(file=files), + environment=Singularity(image=image), + ) + return singu.stdout - wf.set_output([("out", wf.singu.lzout.stdout)]) + wf = Workflow(cmd=cmd, files=filename) - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + res = sub(wf) - res = wf.result() - assert res.output.out == ["hello from pydra", "have a nice one"] + assert [o.strip() for o in res.outputs.out] == [ + "hello from pydra", + "have a nice one", + ] diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 8221751d01..3df84d5b69 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -1,237 +1,193 @@ from pathlib import Path import typing as ty -import os -import attrs -from copy import deepcopy import time - -from ..specs import ( - BaseSpec, - SpecInfo, - File, +import pytest +from fileformats.generic import File +from pydra.engine.specs import ( Runtime, Result, - ShellSpec, - # ContainerSpec, - LazyIn, - LazyOut, - LazyField, - StateArray, + WorkflowDef, ) -from ..helpers import make_klass -from .utils import foo -from pydra import mark, Workflow -import pytest +from pydra.engine.lazy import ( + LazyInField, + LazyOutField, +) +from pydra.engine.core import Workflow +from pydra.engine.node import Node +from pydra.engine.submitter import Submitter, NodeExecution, DiGraph +from pydra.design import python, workflow +from .utils import Foo, FunAddTwo, FunAddVar, ListSum -def test_basespec(): - spec = BaseSpec() - assert spec.hash == "0b1d98df22ecd1733562711c205abca2" +@workflow.define +def TestWorkflow(x: int, y: list[int]) -> int: + node_a = workflow.add(FunAddTwo(a=x), name="A") + node_b = workflow.add(FunAddVar(a=node_a.out).split(b=y).combine("b"), name="B") + node_c = workflow.add(ListSum(x=node_b.out), name="C") + return node_c.out -def test_runtime(): - runtime = Runtime() - assert hasattr(runtime, "rss_peak_gb") - assert hasattr(runtime, "vms_peak_gb") - assert hasattr(runtime, "cpu_peak_percent") - +@pytest.fixture +def workflow_task(submitter: Submitter) -> WorkflowDef: + wf = TestWorkflow(x=1, y=[1, 2, 3]) + with submitter: + submitter(wf) + return wf -def test_result(): - result = Result() - assert hasattr(result, "runtime") - assert hasattr(result, "output") - assert hasattr(result, "errored") - assert getattr(result, "errored") is False +@pytest.fixture +def wf(workflow_task: WorkflowDef) -> Workflow: + wf = Workflow.construct(workflow_task) + for n in wf.nodes: + if n._state: + n._state.prepare_states(inputs=n.state_values) + n._state.prepare_inputs() + return wf -def test_shellspec(): - with pytest.raises(TypeError): - spec = ShellSpec() - spec = ShellSpec(executable="ls") # (executable, args) - assert hasattr(spec, "executable") - assert hasattr(spec, "args") +@pytest.fixture +def submitter(tmp_path) -> Submitter: + return Submitter(tmp_path) -class NodeTesting: - @attrs.define() - class Input: - inp_a: str = "A" - inp_b: str = "B" - def __init__(self): - class InpSpec: - def __init__(self): - self.fields = [("inp_a", int), ("inp_b", int)] +@pytest.fixture +def graph(wf: Workflow, submitter: Submitter) -> DiGraph[NodeExecution]: + return wf.execution_graph(submitter=submitter) - class OutSpec: - def __init__(self): - self.fields = [("out_a", int)] - self.name = "tn" - self.inputs = self.Input() - self.input_spec = InpSpec() - self.output_spec = OutSpec() - self.output_names = ["out_a"] - self.state = None +@pytest.fixture +def node_a(wf) -> Node: + return wf["A"] # we can pick any node to retrieve the values to - def result(self, state_index=None): - class Output: - def __init__(self): - self.out_a = "OUT_A" - - class Result: - def __init__(self): - self.output = Output() - self.errored = False - - def get_output_field(self, field): - return getattr(self.output, field) - - return Result() +def test_runtime(): + runtime = Runtime() + assert hasattr(runtime, "rss_peak_gb") + assert hasattr(runtime, "vms_peak_gb") + assert hasattr(runtime, "cpu_peak_percent") -class WorkflowTesting: - def __init__(self): - class Input: - def __init__(self): - self.inp_a = "A" - self.inp_b = "B" - self.inputs = Input() - self.tn = NodeTesting() +def test_result(tmp_path): + result = Result(output_dir=tmp_path) + assert hasattr(result, "runtime") + assert hasattr(result, "outputs") + assert hasattr(result, "errored") + assert getattr(result, "errored") is False -def test_lazy_inp(): - tn = NodeTesting() - lzin = LazyIn(task=tn) +def test_lazy_inp(wf: Workflow, graph: DiGraph[NodeExecution]): + lf = LazyInField(field="x", type=int, workflow=wf) + assert lf._get_value(workflow=wf, graph=graph) == 1 - lf = lzin.inp_a - assert lf.get_value(wf=WorkflowTesting()) == "A" + lf = LazyInField(field="y", type=str, workflow=wf) + assert lf._get_value(workflow=wf, graph=graph) == [1, 2, 3] - lf = lzin.inp_b - assert lf.get_value(wf=WorkflowTesting()) == "B" +def test_lazy_out(node_a, wf, graph): + lf = LazyOutField(field="out", type=int, node=node_a) + assert lf._get_value(wf, graph) == 3 -def test_lazy_out(): - tn = NodeTesting() - lzout = LazyOut(task=tn) - lf = lzout.out_a - assert lf.get_value(wf=WorkflowTesting()) == "OUT_A" +def test_input_file_hash_1(tmp_path): -def test_lazy_getvale(): - tn = NodeTesting() - lf = LazyIn(task=tn) - with pytest.raises(Exception) as excinfo: - lf.inp_c - assert ( - str(excinfo.value) - == "Task 'tn' has no input attribute 'inp_c', available: 'inp_a', 'inp_b'" - ) + outfile = tmp_path / "test.file" + outfile.touch() + @python.define + def A(in_file: File) -> File: + return in_file -def test_input_file_hash_1(tmp_path): - os.chdir(tmp_path) - outfile = "test.file" - fields = [("in_file", ty.Any)] - input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseSpec,)) - inputs = make_klass(input_spec) - assert inputs(in_file=outfile).hash == "9a106eb2830850834d9b5bf098d5fa85" + assert A(in_file=outfile)._hash == "e708da65b720212c5ce9ed2c65aae59c" with open(outfile, "w") as fp: fp.write("test") - fields = [("in_file", File)] - input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseSpec,)) - inputs = make_klass(input_spec) - assert inputs(in_file=outfile).hash == "02fa5f6f1bbde7f25349f54335e1adaf" + + assert A(in_file=outfile)._hash == "f726a193430352bb3b92dccf5eccff3a" def test_input_file_hash_2(tmp_path): - """input spec with File types, checking when the checksum changes""" + """input definition with File types, checking when the checksum changes""" file = tmp_path / "in_file_1.txt" with open(file, "w") as f: f.write("hello") - input_spec = SpecInfo(name="Inputs", fields=[("in_file", File)], bases=(BaseSpec,)) - inputs = make_klass(input_spec) + @python.define + def A(in_file: File) -> File: + return in_file # checking specific hash value - hash1 = inputs(in_file=file).hash - assert hash1 == "aaa50d60ed33d3a316d58edc882a34c3" + hash1 = A(in_file=file)._hash + assert hash1 == "eba2fafb8df4bae94a7aa42bb159b778" # checking if different name doesn't affect the hash file_diffname = tmp_path / "in_file_2.txt" with open(file_diffname, "w") as f: f.write("hello") - hash2 = inputs(in_file=file_diffname).hash + hash2 = A(in_file=file_diffname)._hash assert hash1 == hash2 # checking if different content (the same name) affects the hash - time.sleep(2) # ensure mtime is different file_diffcontent = tmp_path / "in_file_1.txt" with open(file_diffcontent, "w") as f: f.write("hi") - hash3 = inputs(in_file=file_diffcontent).hash + hash3 = A(in_file=file_diffcontent)._hash assert hash1 != hash3 def test_input_file_hash_2a(tmp_path): - """input spec with ty.Union[File, ...] type, checking when the checksum changes""" + """input definition with ty.Union[File, ...] type, checking when the checksum changes""" file = tmp_path / "in_file_1.txt" with open(file, "w") as f: f.write("hello") - input_spec = SpecInfo( - name="Inputs", fields=[("in_file", ty.Union[File, int])], bases=(BaseSpec,) - ) - inputs = make_klass(input_spec) + @python.define + def A(in_file: ty.Union[File, int]) -> File: + return in_file # checking specific hash value - hash1 = inputs(in_file=file).hash - assert hash1 == "aaa50d60ed33d3a316d58edc882a34c3" + hash1 = A(in_file=file)._hash + assert hash1 == "eba2fafb8df4bae94a7aa42bb159b778" # checking if different name doesn't affect the hash file_diffname = tmp_path / "in_file_2.txt" with open(file_diffname, "w") as f: f.write("hello") - hash2 = inputs(in_file=file_diffname).hash + hash2 = A(in_file=file_diffname)._hash assert hash1 == hash2 + # checking if string is also accepted + hash3 = A(in_file=str(file))._hash + assert hash3 == hash1 + # checking if different content (the same name) affects the hash - time.sleep(2) # ensure mtime is different file_diffcontent = tmp_path / "in_file_1.txt" with open(file_diffcontent, "w") as f: f.write("hi") - hash3 = inputs(in_file=file_diffcontent).hash - assert hash1 != hash3 - - # checking if string is also accepted - hash4 = inputs(in_file=str(file)).hash - assert hash4 == "800af2b5b334c9e3e5c40c0e49b7ffb5" + hash4 = A(in_file=file_diffcontent)._hash + assert hash1 != hash4 def test_input_file_hash_3(tmp_path): - """input spec with File types, checking when the hash and file_hash change""" + """input definition with File types, checking when the hash and file_hash change""" file = tmp_path / "in_file_1.txt" with open(file, "w") as f: f.write("hello") - input_spec = SpecInfo( - name="Inputs", fields=[("in_file", File), ("in_int", int)], bases=(BaseSpec,) - ) - inputs = make_klass(input_spec) + @python.define + def A(in_file: File, in_int: int) -> File: + return in_file, in_int - my_inp = inputs(in_file=file, in_int=3) + a = A(in_file=file, in_int=3) # original hash and files_hash (dictionary contains info about files) - hash1 = my_inp.hash + hash1 = a._hash # files_hash1 = deepcopy(my_inp.files_hash) # file name should be in files_hash1[in_file] filename = str(Path(file)) # assert filename in files_hash1["in_file"] # changing int input - my_inp.in_int = 5 - hash2 = my_inp.hash + a.in_int = 5 + hash2 = a._hash # files_hash2 = deepcopy(my_inp.files_hash) # hash should be different assert hash1 != hash2 @@ -244,7 +200,7 @@ def test_input_file_hash_3(tmp_path): with open(file, "w") as f: f.write("hello") - hash3 = my_inp.hash + hash3 = a._hash # files_hash3 = deepcopy(my_inp.files_hash) # hash should be the same, # but the entry for in_file in files_hash should be different (modification time) @@ -256,42 +212,39 @@ def test_input_file_hash_3(tmp_path): # assert files_hash3["in_file"][filename][1] == files_hash2["in_file"][filename][1] # setting the in_file again - my_inp.in_file = file + a.in_file = file # filename should be removed from files_hash # assert my_inp.files_hash["in_file"] == {} # will be saved again when hash is calculated - assert my_inp.hash == hash3 + assert a._hash == hash3 # assert filename in my_inp.files_hash["in_file"] def test_input_file_hash_4(tmp_path): - """input spec with nested list, that contain ints and Files, + """input definition with nested list, that contain ints and Files, checking changes in checksums """ file = tmp_path / "in_file_1.txt" with open(file, "w") as f: f.write("hello") - input_spec = SpecInfo( - name="Inputs", - fields=[("in_file", ty.List[ty.List[ty.Union[int, File]]])], - bases=(BaseSpec,), - ) - inputs = make_klass(input_spec) + @python.define + def A(in_file: ty.List[ty.List[ty.Union[int, File]]]) -> File: + return in_file # checking specific hash value - hash1 = inputs(in_file=[[file, 3]]).hash - assert hash1 == "0693adbfac9f675af87e900065b1de00" + hash1 = A(in_file=[[file, 3]])._hash + assert hash1 == "b583e0fd5501d3bed9bf510ce2a9e379" # the same file, but int field changes - hash1a = inputs(in_file=[[file, 5]]).hash + hash1a = A(in_file=[[file, 5]])._hash assert hash1 != hash1a # checking if different name doesn't affect the hash file_diffname = tmp_path / "in_file_2.txt" with open(file_diffname, "w") as f: f.write("hello") - hash2 = inputs(in_file=[[file_diffname, 3]]).hash + hash2 = A(in_file=[[file_diffname, 3]])._hash assert hash1 == hash2 # checking if different content (the same name) affects the hash @@ -299,36 +252,33 @@ def test_input_file_hash_4(tmp_path): file_diffcontent = tmp_path / "in_file_1.txt" with open(file_diffcontent, "w") as f: f.write("hi") - hash3 = inputs(in_file=[[file_diffcontent, 3]]).hash + hash3 = A(in_file=[[file_diffcontent, 3]])._hash assert hash1 != hash3 def test_input_file_hash_5(tmp_path): - """input spec with File in nested containers, checking changes in checksums""" + """input definition with File in nested containers, checking changes in checksums""" file = tmp_path / "in_file_1.txt" with open(file, "w") as f: f.write("hello") - input_spec = SpecInfo( - name="Inputs", - fields=[("in_file", ty.List[ty.Dict[ty.Any, ty.Union[File, int]]])], - bases=(BaseSpec,), - ) - inputs = make_klass(input_spec) + @python.define + def A(in_file: ty.List[ty.Dict[ty.Any, ty.Union[File, int]]]) -> File: + return in_file # checking specific hash value - hash1 = inputs(in_file=[{"file": file, "int": 3}]).hash - assert hash1 == "56e6e2c9f3bdf0cd5bd3060046dea480" + hash1 = A(in_file=[{"file": file, "int": 3}])._hash + assert hash1 == "aa2d4b708ed0dd8340582a6514bfd5ce" # the same file, but int field changes - hash1a = inputs(in_file=[{"file": file, "int": 5}]).hash + hash1a = A(in_file=[{"file": file, "int": 5}])._hash assert hash1 != hash1a # checking if different name doesn't affect the hash file_diffname = tmp_path / "in_file_2.txt" with open(file_diffname, "w") as f: f.write("hello") - hash2 = inputs(in_file=[{"file": file_diffname, "int": 3}]).hash + hash2 = A(in_file=[{"file": file_diffname, "int": 3}])._hash assert hash1 == hash2 # checking if different content (the same name) affects the hash @@ -336,67 +286,33 @@ def test_input_file_hash_5(tmp_path): file_diffcontent = tmp_path / "in_file_1.txt" with open(file_diffcontent, "w") as f: f.write("hi") - hash3 = inputs(in_file=[{"file": file_diffcontent, "int": 3}]).hash + hash3 = A(in_file=[{"file": file_diffcontent, "int": 3}])._hash assert hash1 != hash3 -def test_lazy_field_cast(): - task = foo(a="a", b=1, c=2.0, name="foo") - - assert task.lzout.y.type == int - assert task.lzout.y.cast(float).type == float - - -def test_lazy_field_multi_same_split(): - @mark.task - def f(x: ty.List[int]) -> ty.List[int]: - return x +def test_lazy_field_cast(wf: Workflow): + lzout = wf.add(Foo(a="a", b=1, c=2.0), name="foo") - task = f(x=[1, 2, 3], name="foo") + assert lzout.y._type is int + assert workflow.cast(lzout.y, float)._type is float - lf = task.lzout.out.split("foo.x") - assert lf.type == StateArray[int] - assert lf.splits == set([(("foo.x",),)]) - - lf2 = lf.split("foo.x") - assert lf2.type == StateArray[int] - assert lf2.splits == set([(("foo.x",),)]) - - -def test_lazy_field_multi_diff_split(): - @mark.task - def f(x: ty.Any, y: ty.Any) -> ty.Any: - return x - - task = f(x=[1, 2, 3], name="foo") - - lf = task.lzout.out.split("foo.x") - - assert lf.type == StateArray[ty.Any] - assert lf.splits == set([(("foo.x",),)]) - - lf2 = lf.split("foo.x") - assert lf2.type == StateArray[ty.Any] - assert lf2.splits == set([(("foo.x",),)]) - - lf3 = lf.split("foo.y") - assert lf3.type == StateArray[StateArray[ty.Any]] - assert lf3.splits == set([(("foo.x",),), (("foo.y",),)]) - - -def test_wf_lzin_split(): - @mark.task +def test_wf_lzin_split(tmp_path): + @python.define def identity(x: int) -> int: return x - inner = Workflow(name="inner", input_spec=["x"]) - inner.add(identity(x=inner.lzin.x, name="f")) - inner.set_output(("out", inner.f.lzout.out)) + @workflow.define + def Inner(x): + ident = workflow.add(identity(x=x)) + return ident.out + + @workflow.define + def Outer(xs): + inner = workflow.add(Inner().split(x=xs)) + return inner.out - outer = Workflow(name="outer", input_spec=["x"]) - outer.add(inner.split(x=outer.lzin.x)) - outer.set_output(("out", outer.inner.lzout.out)) + outer = Outer(xs=[1, 2, 3]) - result = outer(x=[1, 2, 3]) - assert result.output.out == StateArray([1, 2, 3]) + outputs = outer(cache_dir=tmp_path) + assert outputs.out == [1, 2, 3] diff --git a/pydra/engine/tests/test_state.py b/pydra/engine/tests/test_state.py index c8ef0941ca..adf6b02134 100644 --- a/pydra/engine/tests/test_state.py +++ b/pydra/engine/tests/test_state.py @@ -1,6 +1,8 @@ import pytest from ..state import State +from pydra.design import python +from pydra.engine.specs import PythonDef, PythonOutputs from ..helpers_state import PydraStateError, add_name_splitter @@ -97,25 +99,25 @@ def test_state_1( def test_state_2_err(): with pytest.raises(PydraStateError) as exinfo: - State("NA", splitter={"a"}) + State(name="NA", splitter={"a"}) assert "splitter has to be a string, a tuple or a list" == str(exinfo.value) def test_state_3_err(): with pytest.raises(PydraStateError) as exinfo: - State("NA", splitter=["a", "b"], combiner=("a", "b")) + State(name="NA", splitter=["a", "b"], combiner=("a", "b")) assert "combiner has to be a string or a list" == str(exinfo.value) def test_state_4_err(): - st = State("NA", splitter="a", combiner=["a", "b"]) + st = State(name="NA", splitter="a", combiner=["a", "b"]) with pytest.raises(PydraStateError) as exinfo: st.combiner_validation() assert "all combiners have to be in the splitter" in str(exinfo.value) def test_state_5_err(): - st = State("NA", combiner="a") + st = State(name="NA", combiner="a") with pytest.raises(PydraStateError) as exinfo: st.combiner_validation() assert "splitter has to be set before" in str(exinfo.value) @@ -477,6 +479,7 @@ def test_state_connect_1(): no explicit splitter for the second state """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State(name="NB", other_states={"NA": (st1, "b")}) assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a"] @@ -500,7 +503,12 @@ def test_state_connect_1a(): the second state has explicit splitter from the first one (the prev-state part) """ st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter="_NA", other_states={"NA": (st1, "b")}) + st1.prepare_states(inputs={"NA.a": [3, 5]}) + st2 = State( + name="NB", + splitter="_NA", + other_states={"NA": (st1, "b")}, + ) assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a"] @@ -527,7 +535,11 @@ def test_state_connect_1b_exception(): def test_state_connect_1c_exception(splitter2, other_states2): """can't ask for splitter from node that is not connected""" with pytest.raises(PydraStateError): - st2 = State(name="NB", splitter=splitter2, other_states=other_states2) + st2 = State( + name="NB", + splitter=splitter2, + other_states=other_states2, + ) st2.splitter_validation() @@ -537,7 +549,12 @@ def test_state_connect_2(): splitter from the first node and a new field (the prev-state and current part) """ st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter=["_NA", "a"], other_states={"NA": (st1, "b")}) + st1.prepare_states(inputs={"NA.a": [3, 5]}) + st2 = State( + name="NB", + splitter=["_NA", "a"], + other_states={"NA": (st1, "b")}, + ) assert st2.splitter == ["_NA", "NB.a"] assert st2.splitter_rpn == ["NA.a", "NB.a", "*"] @@ -581,7 +598,12 @@ def test_state_connect_2a(): adding an additional scalar field that is not part of the splitter """ st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter=["_NA", "a"], other_states={"NA": (st1, "b")}) + st1.prepare_states(inputs={"NA.a": [3, 5]}) + st2 = State( + name="NB", + splitter=["_NA", "a"], + other_states={"NA": (st1, "b")}, + ) assert st2.splitter == ["_NA", "NB.a"] assert st2.splitter_rpn == ["NA.a", "NB.a", "*"] @@ -619,6 +641,7 @@ def test_state_connect_2b(): splitter from the first node (the prev-state part) has to be added """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State(name="NB", splitter="a", other_states={"NA": (st1, "b")}) assert st2.splitter == ["_NA", "NB.a"] @@ -626,7 +649,7 @@ def test_state_connect_2b(): assert st2.current_splitter == "NB.a" assert st2.prev_state_splitter == "_NA" - st2.prepare_states(inputs={"NA.a": [3, 5], "NB.a": [1, 2]}) + st2.prepare_states(inputs={"NB.a": [1, 2]}) assert st2.group_for_inputs_final == {"NA.a": 0, "NB.a": 1} assert st2.groups_stack_final == [[0, 1]] assert st2.states_ind == [ @@ -657,8 +680,13 @@ def test_state_connect_3(): splitter from the previous states (the prev-state part) has to be added """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State(name="NB", splitter="a") - st3 = State(name="NC", other_states={"NA": (st1, "b"), "NB": (st2, "c")}) + st2.prepare_states(inputs={"NB.a": [30, 50]}) + st3 = State( + name="NC", + other_states={"NA": (st1, "b"), "NB": (st2, "c")}, + ) assert st3.splitter == ["_NA", "_NB"] assert st3.splitter_rpn == ["NA.a", "NB.a", "*"] @@ -699,7 +727,9 @@ def test_state_connect_3a(): the third state has explicit splitter that contains splitters from previous states """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5], "NB.a": [30, 50]}) st2 = State(name="NB", splitter="a") + st2.prepare_states(inputs={"NA.a": [3, 5], "NB.a": [30, 50]}) st3 = State( name="NC", splitter=["_NA", "_NB"], @@ -741,9 +771,13 @@ def test_state_connect_3b(): splitter from the second state has to be added (partial prev-state part) """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5], "NB.a": [30, 50]}) st2 = State(name="NB", splitter="a") + st2.prepare_states(inputs={"NA.a": [3, 5], "NB.a": [30, 50]}) st3 = State( - name="NC", splitter="_NB", other_states={"NA": (st1, "b"), "NB": (st2, "c")} + name="NC", + splitter="_NB", + other_states={"NA": (st1, "b"), "NB": (st2, "c")}, ) assert st3.splitter == ["_NA", "_NB"] @@ -780,7 +814,9 @@ def test_state_connect_4(): the third state has explicit scalar(!) splitter that contains two previous states """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5], "NB.a": [30, 50]}) st2 = State(name="NB", splitter="a") + st2.prepare_states(inputs={"NA.a": [3, 5], "NB.a": [30, 50]}) st3 = State( name="NC", splitter=("_NA", "_NB"), @@ -811,6 +847,7 @@ def test_state_connect_5(): the second state has no explicit splitter """ st1 = State(name="NA", splitter=["a", "b"]) + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) st2 = State(name="NB", other_states={"NA": (st1, "a")}) assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a", "NA.b", "*"] @@ -841,7 +878,9 @@ def test_state_connect_6(): the third state has explicit splitter with splitters from previous states """ st1 = State(name="NA", splitter=["a", "b"]) + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20], "NB.a": [600, 700]}) st2 = State(name="NB", splitter="a") + st2.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20], "NB.a": [600, 700]}) st3 = State( name="NC", splitter=["_NA", "_NB"], @@ -894,8 +933,13 @@ def test_state_connect_6a(): the third state has no explicit splitter """ st1 = State(name="NA", splitter=["a", "b"]) + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20], "NB.a": [600, 700]}) st2 = State(name="NB", splitter="a") - st3 = State(name="NC", other_states={"NA": (st1, "a"), "NB": (st2, "b")}) + st2.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20], "NB.a": [600, 700]}) + st3 = State( + name="NC", + other_states={"NA": (st1, "a"), "NB": (st2, "b")}, + ) assert st3.splitter == ["_NA", "_NB"] assert st3.splitter_rpn == ["NA.a", "NA.b", "*", "NB.a", "*"] @@ -941,6 +985,7 @@ def test_state_connect_7(): no explicit splitter for the second state """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State(name="NB", other_states={"NA": (st1, ["x", "y"])}) # should take into account that x, y come from the same task assert st2.splitter == "_NA" @@ -967,8 +1012,13 @@ def test_state_connect_8(): and it should give the same as the previous test """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State(name="NB", other_states={"NA": (st1, "b")}) - st3 = State(name="NC", other_states={"NA": (st1, "x"), "NB": (st2, "y")}) + st2.prepare_states(inputs={"NA.a": [3, 5]}) + st3 = State( + name="NC", + other_states={"NA": (st1, "x"), "NB": (st2, "y")}, + ) # x comes from NA and y comes from NB, but NB has only NA's splitter, # so it should be treated as both inputs are from NA state assert st3.splitter == "_NA" @@ -998,9 +1048,18 @@ def test_state_connect_9(): """ st1 = State(name="NA_1", splitter="a") + st1.prepare_states(inputs={"NA_1.a": [3, 5], "NA_2.a": [11, 12]}) st1a = State(name="NA_2", splitter="a") - st2 = State(name="NB", other_states={"NA_1": (st1, "b"), "NA_2": (st1a, "c")}) - st3 = State(name="NC", other_states={"NA_1": (st1, "x"), "NB": (st2, "y")}) + st1a.prepare_states(inputs={"NA_1.a": [3, 5], "NA_2.a": [11, 12]}) + st2 = State( + name="NB", + other_states={"NA_1": (st1, "b"), "NA_2": (st1a, "c")}, + ) + st2.prepare_states(inputs={"NA_1.a": [3, 5], "NA_2.a": [11, 12]}) + st3 = State( + name="NC", + other_states={"NA_1": (st1, "x"), "NB": (st2, "y")}, + ) # x comes from NA_1 and y comes from NB, but NB has only NA_1/2's splitters, assert st3.splitter == ["_NA_1", "_NA_2"] assert st3.splitter_rpn == ["NA_1.a", "NA_2.a", "*"] @@ -1033,7 +1092,12 @@ def test_state_connect_innerspl_1(): the second state has an inner splitter, full splitter provided """ st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter=["_NA", "b"], other_states={"NA": (st1, "b")}) + st1.prepare_states(inputs={"NA.a": [3, 5]}) + st2 = State( + name="NB", + splitter=["_NA", "b"], + other_states={"NA": (st1, "b")}, + ) assert st2.splitter == ["_NA", "NB.b"] assert st2.splitter_rpn == ["NA.a", "NB.b", "*"] @@ -1085,6 +1149,9 @@ def test_state_connect_innerspl_1a(): splitter from the first state (the prev-state part) has to be added """ st1 = State(name="NA", splitter="a") + st1.prepare_states( + inputs={"NA.a": [3, 5]}, + ) st2 = State(name="NB", splitter="b", other_states={"NA": (st1, "b")}) assert st2.splitter == ["_NA", "NB.b"] @@ -1136,7 +1203,11 @@ def test_state_connect_innerspl_1b(): """incorrect splitter - the current & prev-state parts in scalar splitter""" with pytest.raises(PydraStateError): st1 = State(name="NA", splitter="a") - State(name="NB", splitter=("_NA", "b"), other_states={"NA": (st1, "b")}) + State( + name="NB", + splitter=("_NA", "b"), + other_states={"NA": (st1, "b")}, + ) def test_state_connect_innerspl_2(): @@ -1145,7 +1216,15 @@ def test_state_connect_innerspl_2(): only the current part of the splitter provided (the prev-state has to be added) """ st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter=["c", "b"], other_states={"NA": (st1, "b")}) + st1.prepare_states( + inputs={"NA.a": [3, 5], "NB.b": [[1, 10, 100], [2, 20, 200]], "NB.c": [13, 17]}, + cont_dim={"NB.b": 2}, # will be treated as 2d container + ) + st2 = State( + name="NB", + splitter=["c", "b"], + other_states={"NA": (st1, "b")}, + ) assert st2.splitter == ["_NA", ["NB.c", "NB.b"]] assert st2.splitter_rpn == ["NA.a", "NB.c", "NB.b", "*", "*"] @@ -1216,7 +1295,15 @@ def test_state_connect_innerspl_2a(): """ st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter=["b", "c"], other_states={"NA": (st1, "b")}) + st1.prepare_states( + inputs={"NA.a": [3, 5], "NB.b": [[1, 10, 100], [2, 20, 200]], "NB.c": [13, 17]}, + cont_dim={"NB.b": 2}, # will be treated as 2d container + ) + st2 = State( + name="NB", + splitter=["b", "c"], + other_states={"NA": (st1, "b")}, + ) assert st2.splitter == ["_NA", ["NB.b", "NB.c"]] assert st2.splitter_rpn == ["NA.a", "NB.b", "NB.c", "*", "*"] @@ -1283,7 +1370,19 @@ def test_state_connect_innerspl_3(): """ st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter=["c", "b"], other_states={"NA": (st1, "b")}) + st1.prepare_states(inputs={"NA.a": [3, 5]}) + st2 = State( + name="NB", + splitter=["c", "b"], + other_states={"NA": (st1, "b")}, + ) + st2.prepare_states( + inputs={ + "NB.b": [[1, 10, 100], [2, 20, 200]], + "NB.c": [13, 17], + }, + cont_dim={"NB.b": 2}, # will be treated as 2d container + ) st3 = State(name="NC", splitter="d", other_states={"NB": (st2, "a")}) assert st3.splitter == ["_NB", "NC.d"] @@ -1422,9 +1521,23 @@ def test_state_connect_innerspl_4(): the third one connected to two previous, only the current part of splitter provided """ st1 = State(name="NA", splitter="a") + st1.prepare_states( + inputs={ + "NA.a": [3, 5], + } + ) st2 = State(name="NB", splitter=["b", "c"]) + st2.prepare_states( + inputs={ + "NA.a": [3, 5], + "NB.b": [10, 20], + "NB.c": [13, 17], + } + ) st3 = State( - name="NC", splitter="d", other_states={"NA": (st1, "e"), "NB": (st2, "f")} + name="NC", + splitter="d", + other_states={"NA": (st1, "e"), "NB": (st2, "f")}, ) assert st3.splitter == [["_NA", "_NB"], "NC.d"] @@ -1527,6 +1640,7 @@ def test_state_combine_1(): def test_state_connect_combine_1(): """two connected states; outer splitter and combiner in the first one""" st1 = State(name="NA", splitter=["a", "b"], combiner="a") + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) st2 = State(name="NB", other_states={"NA": (st1, "c")}) assert st1.splitter == ["NA.a", "NA.b"] @@ -1572,6 +1686,9 @@ def test_state_connect_combine_2(): additional splitter in the second node """ st1 = State(name="NA", splitter=["a", "b"], combiner="a") + st1.prepare_states( + inputs={"NA.a": [3, 5], "NA.b": [10, 20], "NB.c": [90, 150], "NB.d": [0, 1]} + ) st2 = State(name="NB", splitter="d", other_states={"NA": (st1, "c")}) assert st1.splitter == ["NA.a", "NA.b"] @@ -1634,7 +1751,13 @@ def test_state_connect_combine_3(): additional splitter in the second node """ st1 = State(name="NA", splitter=["a", "b"], combiner="a") - st2 = State(name="NB", splitter="d", combiner="d", other_states={"NA": (st1, "c")}) + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) + st2 = State( + name="NB", + splitter="d", + combiner="d", + other_states={"NA": (st1, "c")}, + ) assert st1.splitter == ["NA.a", "NA.b"] assert st1.splitter_rpn == ["NA.a", "NA.b", "*"] @@ -1699,8 +1822,12 @@ def test_state_connect_innerspl_combine_1(): """one previous node and one inner splitter (and inner splitter combiner); only current part provided - the prev-state part had to be added""" st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State( - name="NB", splitter=["c", "b"], combiner=["b"], other_states={"NA": (st1, "b")} + name="NB", + splitter=["c", "b"], + combiner=["b"], + other_states={"NA": (st1, "b")}, ) assert st2.splitter == ["_NA", ["NB.c", "NB.b"]] @@ -1780,8 +1907,12 @@ def test_state_connect_innerspl_combine_2(): the prev-state part has to be added """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State( - name="NB", splitter=["c", "b"], combiner=["c"], other_states={"NA": (st1, "b")} + name="NB", + splitter=["c", "b"], + combiner=["c"], + other_states={"NA": (st1, "b")}, ) assert st2.splitter == ["_NA", ["NB.c", "NB.b"]] @@ -1856,7 +1987,12 @@ def test_state_connect_combine_prevst_1(): (i.e. from the prev-state part of the splitter), """ st1 = State(name="NA", splitter="a") - st2 = State(name="NB", other_states={"NA": (st1, "b")}, combiner="NA.a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) + st2 = State( + name="NB", + other_states={"NA": (st1, "b")}, + combiner="NA.a", + ) assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a"] assert ( @@ -1886,7 +2022,12 @@ def test_state_connect_combine_prevst_2(): (i.e. from the prev-state part of the splitter), """ st1 = State(name="NA", splitter=["a", "b"]) - st2 = State(name="NB", other_states={"NA": (st1, "b")}, combiner="NA.a") + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) + st2 = State( + name="NB", + other_states={"NA": (st1, "b")}, + combiner="NA.a", + ) assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a", "NA.b", "*"] assert st2.combiner == ["NA.a"] @@ -1894,7 +2035,7 @@ def test_state_connect_combine_prevst_2(): assert st2.current_combiner_all == st2.current_combiner == [] assert st2.splitter_rpn_final == ["NA.b"] - st2.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) + st2.prepare_states(inputs={}) assert st2.group_for_inputs_final == {"NA.b": 0} assert st2.groups_stack_final == [[0]] assert st2.states_ind == [ @@ -1922,14 +2063,20 @@ def test_state_connect_combine_prevst_3(): (i.e. from the prev-state part of the splitter), """ st1 = State(name="NA", splitter=["a", "b"]) + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) st2 = State(name="NB", other_states={"NA": (st1, "b")}) - st3 = State(name="NC", other_states={"NB": (st2, "c")}, combiner="NA.a") + st2.prepare_states(inputs={}) + st3 = State( + name="NC", + other_states={"NB": (st2, "c")}, + combiner="NA.a", + ) assert st3.splitter == "_NB" assert st3.splitter_rpn == ["NA.a", "NA.b", "*"] assert st3.combiner == ["NA.a"] assert st3.splitter_rpn_final == ["NA.b"] - st3.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) + st3.prepare_states(inputs={}) assert st3.group_for_inputs_final == {"NA.b": 0} assert st3.groups_stack_final == [[0]] @@ -1958,7 +2105,9 @@ def test_state_connect_combine_prevst_4(): the third state has also combiner from the prev-state part """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State(name="NB", splitter="a") + st2.prepare_states(inputs={"NB.a": [600, 700]}) st3 = State( name="NC", splitter=["_NA", "_NB"], @@ -2010,7 +2159,9 @@ def test_state_connect_combine_prevst_5(): the third state has also combiner from the prev-state part """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State(name="NB", splitter="a") + st2.prepare_states(inputs={"NB.a": [600, 700]}) st3 = State( name="NC", splitter=("_NA", "_NB"), @@ -2044,8 +2195,12 @@ def test_state_connect_combine_prevst_6(): (i.e. from the prev-state part of the splitter), """ st1 = State(name="NA", splitter=["a", "b"]) + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) st2 = State( - name="NB", splitter="c", other_states={"NA": (st1, "b")}, combiner="NA.a" + name="NB", + splitter="c", + other_states={"NA": (st1, "b")}, + combiner="NA.a", ) assert st2.splitter == ["_NA", "NB.c"] assert st2.splitter_rpn == ["NA.a", "NA.b", "*", "NB.c", "*"] @@ -2098,10 +2253,32 @@ def test_state_connect_combine_prevst_6(): ] +@python.define +class ExampleDef(PythonDef["ExampleDef.Outputs"]): + + a: int + b: int + + class Outputs(PythonOutputs): + c: int + + def function(self): + return self.Outputs(c=self.inputs.a + self.inputs.b) + + +example_def = ExampleDef(a=1, b=2) + + @pytest.mark.parametrize( "splitter, other_states, expected_splitter, expected_prevst, expected_current", [ - (None, {"NA": (State(name="NA", splitter="a"), "b")}, "_NA", "_NA", None), + ( + None, + {"NA": (State(name="NA", splitter="a"), "b")}, + "_NA", + "_NA", + None, + ), ( "b", {"NA": (State(name="NA", splitter="a"), "b")}, @@ -2161,8 +2338,14 @@ def test_connect_splitters( @pytest.mark.parametrize( "splitter, other_states", [ - (("_NA", "b"), {"NA": (State(name="NA", splitter="a"), "b")}), - (["b", "_NA"], {"NA": (State(name="NA", splitter="a"), "b")}), + ( + ("_NA", "b"), + {"NA": (State(name="NA", splitter="a"), "b")}, + ), + ( + ["b", "_NA"], + {"NA": (State(name="NA", splitter="a"), "b")}, + ), ( ["_NB", ["_NA", "b"]], { @@ -2174,7 +2357,11 @@ def test_connect_splitters( ) def test_connect_splitters_exception_1(splitter, other_states): with pytest.raises(PydraStateError) as excinfo: - State(name="CN", splitter=splitter, other_states=other_states) + State( + name="CN", + splitter=splitter, + other_states=other_states, + ) assert "prev-state and current splitters are mixed" in str(excinfo.value) @@ -2194,6 +2381,9 @@ def test_connect_splitters_exception_3(): State( name="CN", splitter="_NB", - other_states=["NA", (State(name="NA", splitter="a"), "b")], + other_states=[ + "NA", + (State(name="NA", splitter="a"), "b"), + ], ) assert "other states has to be a dictionary" == str(excinfo.value) diff --git a/pydra/engine/tests/test_submitter.py b/pydra/engine/tests/test_submitter.py index 298e7e74b4..ddce586cd5 100644 --- a/pydra/engine/tests/test_submitter.py +++ b/pydra/engine/tests/test_submitter.py @@ -5,83 +5,85 @@ import time import attrs import typing as ty -from random import randint import os from unittest.mock import patch import pytest +from pydra.design import workflow from fileformats.generic import Directory from .utils import ( need_sge, need_slurm, - gen_basic_wf, - gen_basic_wf_with_threadcount, - gen_basic_wf_with_threadcount_concurrent, + BasicWorkflow, + BasicWorkflowWithThreadCount, + BasicWorkflowWithThreadCountConcurrent, ) -from ..core import Workflow, TaskBase +from ..core import Task from ..submitter import Submitter -from ..workers import SerialWorker -from ... import mark +from ..workers import DebugWorker +from pydra.design import python from pathlib import Path from datetime import datetime +from pydra.engine.specs import Result -@mark.task -def sleep_add_one(x): +@python.define +def SleepAddOne(x): time.sleep(1) return x + 1 def test_callable_wf(plugin, tmpdir): - wf = gen_basic_wf() - res = wf() - assert res.output.out == 9 - del wf, res + wf = BasicWorkflow(x=5) + outputs = wf(cache_dir=tmpdir) + assert outputs.out == 9 + del wf, outputs # providing plugin - wf = gen_basic_wf() - res = wf(plugin="cf") - assert res.output.out == 9 - del wf, res + wf = BasicWorkflow(x=5) + outputs = wf(worker="cf") + assert outputs.out == 9 + del wf, outputs # providing plugin_kwargs - wf = gen_basic_wf() - res = wf(plugin="cf", plugin_kwargs={"n_procs": 2}) - assert res.output.out == 9 - del wf, res + wf = BasicWorkflow(x=5) + outputs = wf(worker="cf", n_procs=2) + assert outputs.out == 9 + del wf, outputs # providing wrong plugin_kwargs - wf = gen_basic_wf() + wf = BasicWorkflow(x=5) with pytest.raises(TypeError, match="an unexpected keyword argument"): - wf(plugin="cf", plugin_kwargs={"sbatch_args": "-N2"}) + wf(worker="cf", sbatch_args="-N2") # providing submitter - wf = gen_basic_wf() - wf.cache_dir = tmpdir - sub = Submitter(plugin) - res = wf(submitter=sub) - assert res.output.out == 9 + wf = BasicWorkflow(x=5) + + with Submitter(workflow=plugin, cache_dir=tmpdir) as sub: + res = sub(wf) + assert res.outputs.out == 9 def test_concurrent_wf(plugin, tmpdir): # concurrent workflow # A --> C # B --> D - wf = Workflow("new_wf", input_spec=["x", "y"]) - wf.inputs.x = 5 - wf.inputs.y = 10 - wf.add(sleep_add_one(name="taska", x=wf.lzin.x)) - wf.add(sleep_add_one(name="taskb", x=wf.lzin.y)) - wf.add(sleep_add_one(name="taskc", x=wf.taska.lzout.out)) - wf.add(sleep_add_one(name="taskd", x=wf.taskb.lzout.out)) - wf.set_output([("out1", wf.taskc.lzout.out), ("out2", wf.taskd.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(plugin) as sub: - sub(wf) + @workflow.define(outputs=["out1", "out2"]) + def Workflow(x, y): + taska = workflow.add(SleepAddOne(x=x), name="taska") + taskb = workflow.add(SleepAddOne(x=y), name="taskb") + taskc = workflow.add(SleepAddOne(x=taska.out), name="taskc") + taskd = workflow.add(SleepAddOne(x=taskb.out), name="taskd") + return taskc.out, taskd.out + + wf = Workflow(x=5, y=10) - res = wf.result() - assert res.output.out1 == 7 - assert res.output.out2 == 12 + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert not results.errored, " ".join(results.errors["error message"]) + outputs = results.outputs + assert outputs.out1 == 7 + assert outputs.out2 == 12 def test_concurrent_wf_nprocs(tmpdir): @@ -89,49 +91,49 @@ def test_concurrent_wf_nprocs(tmpdir): # setting n_procs in Submitter that is passed to the worker # A --> C # B --> D - wf = Workflow("new_wf", input_spec=["x", "y"]) - wf.inputs.x = 5 - wf.inputs.y = 10 - wf.add(sleep_add_one(name="taska", x=wf.lzin.x)) - wf.add(sleep_add_one(name="taskb", x=wf.lzin.y)) - wf.add(sleep_add_one(name="taskc", x=wf.taska.lzout.out)) - wf.add(sleep_add_one(name="taskd", x=wf.taskb.lzout.out)) - wf.set_output([("out1", wf.taskc.lzout.out), ("out2", wf.taskd.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter("cf", n_procs=2) as sub: - sub(wf) + @workflow.define(outputs=["out1", "out2"]) + def Workflow(x, y): + taska = workflow.add(SleepAddOne(x=x), name="taska") + taskb = workflow.add(SleepAddOne(x=y), name="taskb") + taskc = workflow.add(SleepAddOne(x=taska.out), name="taskc") + taskd = workflow.add(SleepAddOne(x=taskb.out), name="taskd") + return taskc.out, taskd.out + + wf = Workflow(x=5, y=10) + with Submitter(worker="cf", n_procs=2, cache_dir=tmpdir) as sub: + res = sub(wf) - res = wf.result() - assert res.output.out1 == 7 - assert res.output.out2 == 12 + assert not res.errored, " ".join(res.errors["error message"]) + outputs = res.outputs + assert outputs.out1 == 7 + assert outputs.out2 == 12 def test_wf_in_wf(plugin, tmpdir): """WF(A --> SUBWF(A --> B) --> B)""" - wf = Workflow(name="wf_in_wf", input_spec=["x"]) - wf.inputs.x = 3 - wf.add(sleep_add_one(name="wf_a", x=wf.lzin.x)) # workflow task - subwf = Workflow(name="sub_wf", input_spec=["x"]) - subwf.add(sleep_add_one(name="sub_a", x=subwf.lzin.x)) - subwf.add(sleep_add_one(name="sub_b", x=subwf.sub_a.lzout.out)) - subwf.set_output([("out", subwf.sub_b.lzout.out)]) - # connect, then add - subwf.inputs.x = wf.wf_a.lzout.out - subwf.cache_dir = tmpdir - - wf.add(subwf) - wf.add(sleep_add_one(name="wf_b", x=wf.sub_wf.lzout.out)) - wf.set_output([("out", wf.wf_b.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(plugin) as sub: - sub(wf) + @workflow.define + def SubWf(x): + sub_a = workflow.add(SleepAddOne(x=x), name="sub_a") + sub_b = workflow.add(SleepAddOne(x=sub_a.out), name="sub_b") + return sub_b.out + + @workflow.define + def WfInWf(x): + a = workflow.add(SleepAddOne(x=x), name="a") + subwf = workflow.add(SubWf(x=a.out), name="subwf") + b = workflow.add(SleepAddOne(x=subwf.out), name="b") + return b.out + + wf = WfInWf(x=3) - res = wf.result() - assert res.output.out == 7 + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert not results.errored, " ".join(results.errors["error message"]) + outputs = results.outputs + assert outputs.out == 7 @pytest.mark.flaky(reruns=2) # when dask @@ -139,60 +141,59 @@ def test_wf2(plugin_dask_opt, tmpdir): """workflow as a node workflow-node with one task and no splitter """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(sleep_add_one(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wfnd.inputs.x = 2 - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2 = workflow.add(SleepAddOne(x=x)) + return add2.out - with Submitter(plugin=plugin_dask_opt) as sub: - sub(wf) + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x=x)) + return wfnd.out + + wf = Workflow(x=2) - res = wf.result() - assert res.output.out == 3 + with Submitter(worker=plugin_dask_opt, cache_dir=tmpdir) as sub: + res = sub(wf) + + assert res.outputs.out == 3 @pytest.mark.flaky(reruns=2) # when dask def test_wf_with_state(plugin_dask_opt, tmpdir): - wf = Workflow(name="wf_with_state", input_spec=["x"]) - wf.add(sleep_add_one(name="taska", x=wf.lzin.x)) - wf.add(sleep_add_one(name="taskb", x=wf.taska.lzout.out)) - - wf.split("x", x=[1, 2, 3]) - wf.set_output([("out", wf.taskb.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x): + taska = workflow.add(SleepAddOne(x=x), name="taska") + taskb = workflow.add(SleepAddOne(x=taska.out), name="taskb") + return taskb.out - with Submitter(plugin=plugin_dask_opt) as sub: - sub(wf) + wf = Workflow().split(x=[1, 2, 3]) - res = wf.result() + with Submitter(cache_dir=tmpdir, worker=plugin_dask_opt) as sub: + res = sub(wf) - assert res[0].output.out == 3 - assert res[1].output.out == 4 - assert res[2].output.out == 5 + assert res.outputs.out[0] == 3 + assert res.outputs.out[1] == 4 + assert res.outputs.out[2] == 5 -def test_serial_wf(): +def test_debug_wf(): # Use serial plugin to execute workflow instead of CF - wf = gen_basic_wf() - res = wf(plugin="serial") - assert res.output.out == 9 + wf = BasicWorkflow(x=5) + outputs = wf(worker="debug") + assert outputs.out == 9 @need_slurm def test_slurm_wf(tmpdir): - wf = gen_basic_wf() - wf.cache_dir = tmpdir + wf = BasicWorkflow(x=1) # submit workflow and every task as slurm job - with Submitter("slurm") as sub: - sub(wf) + with Submitter(worker="slurm", cache_dir=tmpdir) as sub: + res = sub(wf) - res = wf.result() - assert res.output.out == 9 + outputs = res.outputs + assert outputs.out == 9 script_dir = tmpdir / "SlurmWorker_scripts" assert script_dir.exists() # ensure each task was executed with slurm @@ -202,13 +203,11 @@ def test_slurm_wf(tmpdir): @need_slurm def test_slurm_wf_cf(tmpdir): # submit entire workflow as single job executing with cf worker - wf = gen_basic_wf() - wf.cache_dir = tmpdir - wf.plugin = "cf" - with Submitter("slurm") as sub: - sub(wf) - res = wf.result() - assert res.output.out == 9 + wf = BasicWorkflow(x=1) + with Submitter(worker="slurm", cache_dir=tmpdir) as sub: + res = sub(wf) + outputs = res.outputs + assert outputs.out == 9 script_dir = tmpdir / "SlurmWorker_scripts" assert script_dir.exists() # ensure only workflow was executed with slurm @@ -220,14 +219,12 @@ def test_slurm_wf_cf(tmpdir): @need_slurm def test_slurm_wf_state(tmpdir): - wf = gen_basic_wf() - wf.split("x", x=[5, 6]) - wf.cache_dir = tmpdir - with Submitter("slurm") as sub: - sub(wf) - res = wf.result() - assert res[0].output.out == 9 - assert res[1].output.out == 10 + wf = BasicWorkflow(x=1).split(x=[5, 6]) + with Submitter(worker="slurm", cache_dir=tmpdir) as sub: + res = sub(wf) + + assert res.outputs.out[0] == 9 + assert res.outputs.out[1] == 10 script_dir = tmpdir / "SlurmWorker_scripts" assert script_dir.exists() sdirs = [sd for sd in script_dir.listdir() if sd.isdir()] @@ -237,16 +234,18 @@ def test_slurm_wf_state(tmpdir): @need_slurm @pytest.mark.flaky(reruns=3) def test_slurm_max_jobs(tmpdir): - wf = Workflow("new_wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.inputs.x = 5 - wf.inputs.y = 10 - wf.add(sleep_add_one(name="taska", x=wf.lzin.x)) - wf.add(sleep_add_one(name="taskb", x=wf.lzin.y)) - wf.add(sleep_add_one(name="taskc", x=wf.taska.lzout.out)) - wf.add(sleep_add_one(name="taskd", x=wf.taskb.lzout.out)) - wf.set_output([("out1", wf.taskc.lzout.out), ("out2", wf.taskd.lzout.out)]) - with Submitter("slurm", max_jobs=1) as sub: - sub(wf) + @workflow.define(outputs=["out1", "out2"]) + def Workflow(x, y): + taska = workflow.add(SleepAddOne(x=x)) + taskb = workflow.add(SleepAddOne(x=y)) + taskc = workflow.add(SleepAddOne(x=taska.out)) + taskd = workflow.add(SleepAddOne(x=taskb.out)) + return taskc.out, taskd.out + + wf = Workflow(x=5, y=10) + + with Submitter(worker="slurm", cache_dir=tmpdir, max_jobs=1) as sub: + res = sub(wf) jobids = [] time.sleep(0.5) # allow time for sacct to collect itself @@ -277,14 +276,12 @@ def test_slurm_max_jobs(tmpdir): @need_slurm def test_slurm_args_1(tmpdir): """testing sbatch_args provided to the submitter""" - task = sleep_add_one(x=1) - task.cache_dir = tmpdir + task = SleepAddOne(x=1) # submit workflow and every task as slurm job - with Submitter("slurm", sbatch_args="-N1") as sub: - sub(task) + with Submitter(worker="slurm", cache_dir=tmpdir, sbatch_args="-N1") as sub: + res = sub(task) - res = task.result() - assert res.output.out == 2 + assert res.outputs.out == 2 script_dir = tmpdir / "SlurmWorker_scripts" assert script_dir.exists() @@ -294,15 +291,16 @@ def test_slurm_args_2(tmpdir): """testing sbatch_args provided to the submitter exception should be raised for invalid options """ - task = sleep_add_one(x=1) - task.cache_dir = tmpdir + task = SleepAddOne(x=1) # submit workflow and every task as slurm job with pytest.raises(RuntimeError, match="Error returned from sbatch:"): - with Submitter("slurm", sbatch_args="-N1 --invalid") as sub: + with Submitter( + worker="slurm", cache_dir=tmpdir, sbatch_args="-N1 --invalid" + ) as sub: sub(task) -@mark.task +@python.define def sleep(x, job_name_part): time.sleep(x) import subprocess as sp @@ -319,7 +317,7 @@ def sleep(x, job_name_part): return x -@mark.task +@python.define def cancel(job_name_part): import subprocess as sp @@ -347,26 +345,23 @@ def test_slurm_cancel_rerun_1(tmpdir): The first job should be re-queue and finish without problem. (possibly has to be improved, in theory cancel job might finish before cancel) """ - wf = Workflow( - name="wf", - input_spec=["x", "job_name_cancel", "job_name_resqueue"], - cache_dir=tmpdir, - ) - wf.add(sleep(name="sleep1", x=wf.lzin.x, job_name_part=wf.lzin.job_name_cancel)) - wf.add(cancel(name="cancel1", job_name_part=wf.lzin.job_name_resqueue)) - wf.inputs.x = 10 - wf.inputs.job_name_resqueue = "sleep1" - wf.inputs.job_name_cancel = "cancel1" - - wf.set_output([("out", wf.sleep1.lzout.out), ("canc_out", wf.cancel1.lzout.out)]) - with Submitter("slurm") as sub: - sub(wf) - res = wf.result() - assert res.output.out == 10 + @workflow.define(outputs=["out", "canc_out"]) + def Workflow(x, job_name_cancel, job_name_resqueue): + sleep1 = workflow.add(sleep(x=x, job_name_part=job_name_cancel)) + cancel1 = workflow.add(cancel(job_name_part=job_name_resqueue)) + return sleep1.out, cancel1.out + + wf = Workflow(x=10, job_name_resqueue="sleep1", job_name_cancel="cancel1") + + with Submitter(worker="slurm", cache_dir=tmpdir) as sub: + res = sub(wf) + + outputs = res.outputs + assert outputs.out == 10 # checking if indeed the sleep-task job was cancelled by cancel-task - assert "Terminating" in res.output.canc_out - assert "Invalid" not in res.output.canc_out + assert "Terminating" in outputs.canc_out + assert "Invalid" not in outputs.canc_out script_dir = tmpdir / "SlurmWorker_scripts" assert script_dir.exists() @@ -379,32 +374,32 @@ def test_slurm_cancel_rerun_2(tmpdir): job_id of the first task and cancel it. The first job is not able t be rescheduled and the error is returned. """ - wf = Workflow(name="wf", input_spec=["x", "job_name"], cache_dir=tmpdir) - wf.add(sleep(name="sleep2", x=wf.lzin.x)) - wf.add(cancel(name="cancel2", job_name_part=wf.lzin.job_name)) - wf.inputs.x = 10 - wf.inputs.job_name = "sleep2" + @workflow.define(outputs=["out", "canc_out"]) + def Workflow(x, job_name): + sleep2 = workflow.add(sleep(x=x)) + cancel2 = workflow.add(cancel(job_name_part=job_name)) + return sleep2.out, cancel2.out + + wf = Workflow(x=10, job_name="sleep2") - wf.set_output([("out", wf.sleep2.lzout.out), ("canc_out", wf.cancel2.lzout.out)]) with pytest.raises(Exception): - with Submitter("slurm", sbatch_args="--no-requeue") as sub: + with Submitter( + worker="slurm", cache_dir=tmpdir, sbatch_args="--no-requeue" + ) as sub: sub(wf) @need_sge def test_sge_wf(tmpdir): """testing that a basic workflow can be run with the SGEWorker""" - wf = gen_basic_wf() - wf.cache_dir = tmpdir + wf = BasicWorkflow(x=1) # submit workflow and every task as sge job - with Submitter( - "sge", - ) as sub: - sub(wf) + with Submitter(worker="sge", cache_dir=tmpdir) as sub: + res = sub(wf) - res = wf.result() - assert res.output.out == 9 + outputs = res.outputs + assert outputs.out == 9 script_dir = tmpdir / "SGEWorker_scripts" assert script_dir.exists() # ensure each task was executed with sge @@ -412,18 +407,16 @@ def test_sge_wf(tmpdir): @need_sge -def test_sge_wf_cf(tmpdir): +def test_sge_wf_cf(tmp_path): """testing the SGEWorker can submit SGE tasks while the workflow uses the concurrent futures plugin""" # submit entire workflow as single job executing with cf worker - wf = gen_basic_wf() - wf.cache_dir = tmpdir - wf.plugin = "cf" - with Submitter("sge") as sub: - sub(wf) - res = wf.result() - assert res.output.out == 9 - script_dir = tmpdir / "SGEWorker_scripts" + wf = BasicWorkflow(x=1) + with Submitter(worker="sge", cache_dir=tmp_path) as sub: + res = sub(wf) + outputs = res.outputs + assert outputs.out == 9 + script_dir = tmp_path / "SGEWorker_scripts" assert script_dir.exists() # ensure only workflow was executed with slurm sdirs = [sd for sd in script_dir.listdir() if sd.isdir()] @@ -435,15 +428,11 @@ def test_sge_wf_cf(tmpdir): @need_sge def test_sge_wf_state(tmpdir): """testing the SGEWorker can be used with a workflow with state""" - wf = gen_basic_wf() - wf.split("x") - wf.inputs.x = [5, 6] - wf.cache_dir = tmpdir - with Submitter("sge") as sub: - sub(wf) - res = wf.result() - assert res[0].output.out == 9 - assert res[1].output.out == 10 + wf = BasicWorkflow().split(x=[5, 6]) + with Submitter(worker="sge", cache_dir=tmpdir) as sub: + res = sub(wf) + assert res.output.out[0] == 9 + assert res.output.out[1] == 10 script_dir = tmpdir / "SGEWorker_scripts" assert script_dir.exists() sdirs = [sd for sd in script_dir.listdir() if sd.isdir()] @@ -469,12 +458,10 @@ def qacct_output_to_dict(qacct_output): def test_sge_set_threadcount(tmpdir): """testing the number of threads for an SGEWorker task can be set using the input_spec variable sgeThreads""" - wf = gen_basic_wf_with_threadcount() - wf.inputs.x = 5 - wf.cache_dir = tmpdir + wf = BasicWorkflowWithThreadCount(x=5) jobids = [] - with Submitter("sge") as sub: + with Submitter(worker="sge", cache_dir=tmpdir) as sub: sub(wf) jobids = list(sub.worker.jobid_by_task_uid.values()) jobids.sort() @@ -499,13 +486,10 @@ def test_sge_set_threadcount(tmpdir): def test_sge_limit_maxthreads(tmpdir): """testing the ability to limit the number of threads used by the SGE at one time with the max_threads argument to SGEWorker""" - wf = gen_basic_wf_with_threadcount_concurrent() - wf.inputs.x = [5, 6] - wf.split("x") - wf.cache_dir = tmpdir + wf = BasicWorkflowWithThreadCountConcurrent().split(x=[5, 6]) jobids = [] - with Submitter("sge", max_threads=8) as sub: + with Submitter(worker="sge", max_threads=8, cache_dir=tmpdir) as sub: sub(wf) jobids = list(sub.worker.jobid_by_task_uid.values()) jobids.sort() @@ -543,13 +527,10 @@ def test_sge_limit_maxthreads(tmpdir): def test_sge_no_limit_maxthreads(tmpdir): """testing unlimited threads can be used at once by SGE when max_threads is not set""" - wf = gen_basic_wf_with_threadcount_concurrent() - wf.inputs.x = [5, 6] - wf.split("x") - wf.cache_dir = tmpdir + wf = BasicWorkflowWithThreadCountConcurrent().split(x=[5, 6]) jobids = [] - with Submitter("sge", max_threads=None) as sub: + with Submitter(worker="sge", max_threads=None, cache_dir=tmpdir) as sub: sub(wf) jobids = list(sub.worker.jobid_by_task_uid.values()) jobids.sort() @@ -580,14 +561,14 @@ def test_sge_no_limit_maxthreads(tmpdir): def test_hash_changes_in_task_inputs_file(tmp_path): - @mark.task + @python.define def output_dir_as_input(out_dir: Directory) -> Directory: (out_dir.fspath / "new-file.txt").touch() return out_dir task = output_dir_as_input(out_dir=tmp_path) with pytest.raises(RuntimeError, match="Input field hashes have changed"): - task() + task(cache_dir=tmp_path) def test_hash_changes_in_task_inputs_unstable(tmp_path): @@ -599,83 +580,42 @@ def __bytes_repr__(self, cache) -> ty.Iterator[bytes]: """Random 128-bit bytestring""" yield secrets.token_bytes(16) - @mark.task + @python.define def unstable_input(unstable: Unstable) -> int: return unstable.value task = unstable_input(unstable=Unstable(1)) with pytest.raises(RuntimeError, match="Input field hashes have changed"): - task() + task(cache_dir=tmp_path) def test_hash_changes_in_workflow_inputs(tmp_path): - @mark.task - def output_dir_as_output(out_dir: Path) -> Directory: + @python.define + def OutputDirAsOutput(out_dir: Path) -> Directory: (out_dir / "new-file.txt").touch() return out_dir - wf = Workflow( - name="test_hash_change", input_spec={"in_dir": Directory}, in_dir=tmp_path - ) - wf.add(output_dir_as_output(out_dir=wf.lzin.in_dir, name="task")) - wf.set_output(("out_dir", wf.task.lzout.out)) - with pytest.raises(RuntimeError, match="Input field hashes have changed.*Workflow"): - wf() - - -def test_hash_changes_in_workflow_graph(tmpdir): - class X: - """Dummy class with unstable hash (i.e. which isn't altered in a node in which - it is an input)""" + @workflow.define(outputs=["out_dir"]) + def Workflow(in_dir: Directory): + task = workflow.add(OutputDirAsOutput(out_dir=in_dir), name="task") + return task.out - value = 1 + in_dir = tmp_path / "in_dir" + in_dir.mkdir() + cache_dir = tmp_path / "cache_dir" + cache_dir.mkdir() - def __bytes_repr__(self, cache): - """Bytes representation from class attribute, which will be changed be - 'alter_x" node. + wf = Workflow(in_dir=in_dir) + with pytest.raises(RuntimeError, match="Input field hashes have changed.*"): + wf(cache_dir=cache_dir) - NB: this is a contrived example where the bytes_repr implementation returns - a bytes representation of a class attribute in order to trigger the exception, - hopefully cases like this will be very rare""" - yield bytes(self.value) - @mark.task - @mark.annotate({"return": {"x": X, "y": int}}) - def identity(x: X) -> ty.Tuple[X, int]: - return x, 99 - - @mark.task - def alter_x(y): - X.value = 2 - return y - - @mark.task - def to_tuple(x, y): - return (x, y) - - wf = Workflow(name="wf_with_blocked_tasks", input_spec=["x", "y"]) - wf.add(identity(name="taska", x=wf.lzin.x)) - wf.add(alter_x(name="taskb", y=wf.taska.lzout.y)) - wf.add(to_tuple(name="taskc", x=wf.taska.lzout.x, y=wf.taskb.lzout.out)) - wf.set_output([("out", wf.taskc.lzout.out)]) - - wf.inputs.x = X() - - wf.cache_dir = tmpdir - - with pytest.raises( - RuntimeError, match="Graph of 'wf_with_blocked_tasks' workflow is not empty" - ): - with Submitter("cf") as sub: - result = sub(wf) - - -@mark.task +@python.define def to_tuple(x, y): return (x, y) -class BYOAddVarWorker(SerialWorker): +class BYOAddVarWorker(DebugWorker): """A dummy worker that adds 1 to the output of the task""" plugin_name = "byo_add_env_var" @@ -684,42 +624,44 @@ def __init__(self, add_var, **kwargs): super().__init__(**kwargs) self.add_var = add_var - async def exec_serial(self, runnable, rerun=False, environment=None): - if isinstance(runnable, TaskBase): - with patch.dict(os.environ, {"BYO_ADD_VAR": str(self.add_var)}): - result = runnable._run(rerun, environment=environment) - return result - else: # it could be tuple that includes pickle files with tasks and inputs - return super().exec_serial(runnable, rerun, environment) + def run( + self, + task: "Task", + rerun: bool = False, + ) -> "Result": + with patch.dict(os.environ, {"BYO_ADD_VAR": str(self.add_var)}): + return super().run(task, rerun) -@mark.task -def add_env_var_task(x: int) -> int: +@python.define +def AddEnvVarTask(x: int) -> int: return x + int(os.environ.get("BYO_ADD_VAR", 0)) -def test_byo_worker(): +def test_byo_worker(tmp_path): + + task1 = AddEnvVarTask(x=1) - task1 = add_env_var_task(x=1) + with Submitter(worker=BYOAddVarWorker, add_var=10, cache_dir=tmp_path) as sub: + assert sub.worker_name == "byo_add_env_var" + result = sub(task1) - with Submitter(plugin=BYOAddVarWorker, add_var=10) as sub: - assert sub.plugin == "byo_add_env_var" - result = task1(submitter=sub) + assert result.outputs.out == 11 - assert result.output.out == 11 + task2 = AddEnvVarTask(x=2) - task2 = add_env_var_task(x=2) + new_cache_dir = tmp_path / "new" - with Submitter(plugin="serial") as sub: - result = task2(submitter=sub) + with Submitter(worker="debug", cache_dir=new_cache_dir) as sub: + result = sub(task2) - assert result.output.out == 2 + assert result.outputs.out == 2 def test_bad_builtin_worker(): with pytest.raises(NotImplementedError, match="No worker for 'bad-worker' plugin"): - Submitter(plugin="bad-worker") + Submitter(worker="bad-worker") def test_bad_byo_worker(): @@ -730,4 +672,4 @@ class BadWorker: with pytest.raises( ValueError, match="Worker class must have a 'plugin_name' str attribute" ): - Submitter(plugin=BadWorker) + Submitter(worker=BadWorker) diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 0d666574e3..f15691b82a 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -1,26 +1,27 @@ import typing as ty -import os, sys -import attr +import os +import sys +import attrs +import shutil import pytest import cloudpickle as cp from pathlib import Path import json import glob as glob -from ... import mark -from ..core import Workflow -from ..task import AuditFlag, ShellCommandTask -from ...utils.messenger import FileMessenger, PrintMessenger, collect_messages -from .utils import gen_basic_wf -from ..specs import ( +from pydra.design import python, shell, workflow +from pydra.utils.messenger import FileMessenger, PrintMessenger, collect_messages +from ..task import AuditFlag +from pydra.engine.specs import argstr_formatting, ShellDef, ShellOutputs, TaskHooks +from pydra.engine.helpers import list_fields, print_help +from pydra.engine.submitter import Submitter +from pydra.engine.core import Task +from pydra.utils import default_run_cache_dir +from pydra.utils.typing import ( MultiInputObj, MultiOutputObj, - SpecInfo, - FunctionSpec, - BaseSpec, - ShellSpec, - File, ) -from ...utils.hash import hash_function +from fileformats.generic import File +from pydra.utils.hash import hash_function no_win = pytest.mark.skipif( @@ -29,145 +30,128 @@ ) -@mark.task -def funaddtwo(a): +@python.define +def FunAddTwo(a): return a + 2 def test_output(): - nn = funaddtwo(a=3) - res = nn._run() - assert res.output.out == 5 - - -def test_name_conflict(): - """raise error if task name conflicts with a class attribute or method""" - with pytest.raises(ValueError) as excinfo1: - funaddtwo(name="split", a=3) - assert "Cannot use names of attributes or methods" in str(excinfo1.value) - with pytest.raises(ValueError) as excinfo2: - funaddtwo(name="checksum", a=3) - assert "Cannot use names of attributes or methods" in str(excinfo2.value) + nn = FunAddTwo(a=3) + outputs = nn() + assert outputs.out == 5 def test_numpy(): """checking if mark.task works for numpy functions""" np = pytest.importorskip("numpy") - fft = mark.annotate({"a": np.ndarray, "return": np.ndarray})(np.fft.fft) - fft = mark.task(fft)() + FFT = python.define(inputs={"a": np.ndarray}, outputs={"out": np.ndarray})( + np.fft.fft + ) + arr = np.array([[1, 10], [2, 20]]) - fft.inputs.a = arr - res = fft() - assert np.allclose(np.fft.fft(arr), res.output.out) + fft = FFT(a=arr) + outputs = fft() + assert np.allclose(np.fft.fft(arr), outputs.out) @pytest.mark.xfail(reason="cp.dumps(func) depends on the system/setup, TODO!!") def test_checksum(): - nn = funaddtwo(a=3) + nn = FunAddTwo(a=3) assert ( - nn.checksum - == "FunctionTask_abb4e7cc03b13d0e73884b87d142ed5deae6a312275187a9d8df54407317d7d3" + nn._checksum + == "PythonTask_abb4e7cc03b13d0e73884b87d142ed5deae6a312275187a9d8df54407317d7d3" ) def test_annotated_func(): - @mark.task - def testfunc( - a: int, b: float = 0.1 - ) -> ty.NamedTuple("Output", [("out_out", float)]): + @python.define(outputs=["out_out"]) + def TestFunc(a: int, b: float = 0.1) -> float: return a + b - funky = testfunc(a=1) - assert hasattr(funky.inputs, "a") - assert hasattr(funky.inputs, "b") - assert hasattr(funky.inputs, "_func") - assert getattr(funky.inputs, "a") == 1 - assert getattr(funky.inputs, "b") == 0.1 - assert getattr(funky.inputs, "_func") is not None - assert set(funky.output_names) == {"out_out"} - # assert funky.inputs.hash == '17772c3aec9540a8dd3e187eecd2301a09c9a25c6e371ddd86e31e3a1ecfeefa' - assert funky.__class__.__name__ + "_" + funky.inputs.hash == funky.checksum - - result = funky() - assert hasattr(result, "output") - assert hasattr(result.output, "out_out") - assert result.output.out_out == 1.1 - - assert os.path.exists(funky.cache_dir / funky.checksum / "_result.pklz") - funky.result() # should not recompute - funky.inputs.a = 2 - # assert funky.checksum == '537d25885fd2ea5662b7701ba02c132c52a9078a3a2d56aa903a777ea90e5536' - assert funky.result() is None - funky() - result = funky.result() - assert result.output.out_out == 2.1 - - help = funky.help(returnhelp=True) + funky = TestFunc(a=1) + assert hasattr(funky, "a") + assert hasattr(funky, "b") + assert hasattr(funky, "function") + assert getattr(funky, "a") == 1 + assert getattr(funky, "b") == 0.1 + assert getattr(funky, "function") is not None + assert set(f.name for f in list_fields(funky.Outputs)) == {"out_out"} + + outputs = funky() + assert hasattr(outputs, "out_out") + assert outputs.out_out == 1.1 + + assert os.path.exists( + default_run_cache_dir / f"python-{funky._hash}" / "_result.pklz" + ) + funky() # should not recompute + funky.a = 2 + outputs = funky() + assert outputs.out_out == 2.1 + + help = print_help(funky) assert help == [ - "Help for FunctionTask", + "Help for TestFunc", "Input Parameters:", "- a: int", "- b: float (default: 0.1)", - "- _func: bytes", "Output Parameters:", "- out_out: float", ] def test_annotated_func_dictreturn(): - """Test mapping from returned dictionary to output spec.""" + """Test mapping from returned dictionary to output definition.""" - @mark.task - @mark.annotate({"return": {"sum": int, "mul": ty.Optional[int]}}) - def testfunc(a: int, b: int): + @python.define(outputs={"sum": int, "mul": ty.Optional[int]}) + def TestFunc(a: int, b: int): return dict(sum=a + b, diff=a - b) - task = testfunc(a=2, b=3) - result = task() + task = TestFunc(a=2, b=3) + outputs = task() # Part of the annotation and returned, should be exposed to output. - assert result.output.sum == 5 + assert outputs.sum == 5 # Part of the annotation but not returned, should be coalesced to None - assert result.output.mul is None + assert outputs.mul is None # Not part of the annotation, should be discarded. - assert not hasattr(result.output, "diff") + assert not hasattr(outputs, "diff") def test_annotated_func_multreturn(): """the function has two elements in the return statement""" - @mark.task - def testfunc( + @python.define(outputs={"fractional": float, "integer": int}) + def TestFunc( a: float, - ) -> ty.NamedTuple("Output", [("fractional", float), ("integer", int)]): + ): import math return math.modf(a)[0], int(math.modf(a)[1]) - funky = testfunc(a=3.5) - assert hasattr(funky.inputs, "a") - assert hasattr(funky.inputs, "_func") - assert getattr(funky.inputs, "a") == 3.5 - assert getattr(funky.inputs, "_func") is not None - assert set(funky.output_names) == {"fractional", "integer"} - assert funky.__class__.__name__ + "_" + funky.inputs.hash == funky.checksum - - result = funky() - assert os.path.exists(funky.cache_dir / funky.checksum / "_result.pklz") - assert hasattr(result, "output") - assert hasattr(result.output, "fractional") - assert result.output.fractional == 0.5 - assert hasattr(result.output, "integer") - assert result.output.integer == 3 - - help = funky.help(returnhelp=True) + funky = TestFunc(a=3.5) + assert hasattr(funky, "a") + assert hasattr(funky, "function") + assert getattr(funky, "a") == 3.5 + assert getattr(funky, "function") is not None + assert set(f.name for f in list_fields(funky.Outputs)) == {"fractional", "integer"} + + outputs = funky() + assert os.path.exists( + default_run_cache_dir / f"python-{funky._hash}" / "_result.pklz" + ) + assert hasattr(outputs, "fractional") + assert outputs.fractional == 0.5 + assert hasattr(outputs, "integer") + assert outputs.integer == 3 + + help = print_help(funky) assert help == [ - "Help for FunctionTask", + "Help for TestFunc", "Input Parameters:", "- a: float", - "- _func: bytes", "Output Parameters:", "- fractional: float", "- integer: int", @@ -177,57 +161,57 @@ def testfunc( def test_annotated_input_func_1(): """the function with annotated input (float)""" - @mark.task - def testfunc(a: float): + @python.define + def TestFunc(a: float): return a - funky = testfunc(a=3.5) - assert getattr(funky.inputs, "a") == 3.5 + funky = TestFunc(a=3.5) + assert getattr(funky, "a") == 3.5 def test_annotated_input_func_2(): """the function with annotated input (int, but float provided)""" - @mark.task - def testfunc(a: int): + @python.define + def TestFunc(a: int): return a with pytest.raises(TypeError): - testfunc(a=3.5) + TestFunc(a=3.5) def test_annotated_input_func_2a(): """the function with annotated input (int, but float provided)""" - @mark.task - def testfunc(a: int): + @python.define + def TestFunc(a: int): return a - funky = testfunc() + funky = TestFunc() with pytest.raises(TypeError): - funky.inputs.a = 3.5 + funky.a = 3.5 def test_annotated_input_func_3(): """the function with annotated input (list)""" - @mark.task - def testfunc(a: list): + @python.define + def TestFunc(a: list): return sum(a) - funky = testfunc(a=[1, 3.5]) - assert getattr(funky.inputs, "a") == [1, 3.5] + funky = TestFunc(a=[1, 3.5]) + assert getattr(funky, "a") == [1, 3.5] def test_annotated_input_func_3a(): """the function with annotated input (list of floats)""" - @mark.task - def testfunc(a: ty.List[float]): + @python.define + def TestFunc(a: ty.List[float]): return sum(a) - funky = testfunc(a=[1.0, 3.5]) - assert getattr(funky.inputs, "a") == [1.0, 3.5] + funky = TestFunc(a=[1.0, 3.5]) + assert getattr(funky, "a") == [1.0, 3.5] def test_annotated_input_func_3b(): @@ -235,12 +219,12 @@ def test_annotated_input_func_3b(): (list of floats - int and float provided, should be fine) """ - @mark.task - def testfunc(a: ty.List[float]): + @python.define + def TestFunc(a: ty.List[float]): return sum(a) - funky = testfunc(a=[1, 3.5]) - assert getattr(funky.inputs, "a") == [1, 3.5] + funky = TestFunc(a=[1, 3.5]) + assert getattr(funky, "a") == [1, 3.5] def test_annotated_input_func_3c_excep(): @@ -248,45 +232,45 @@ def test_annotated_input_func_3c_excep(): (list of ints - int and float provided, should raise an error) """ - @mark.task - def testfunc(a: ty.List[int]): + @python.define + def TestFunc(a: ty.List[int]): return sum(a) with pytest.raises(TypeError): - testfunc(a=[1, 3.5]) + TestFunc(a=[1, 3.5]) def test_annotated_input_func_4(): """the function with annotated input (dictionary)""" - @mark.task - def testfunc(a: dict): + @python.define + def TestFunc(a: dict): return sum(a.values()) - funky = testfunc(a={"el1": 1, "el2": 3.5}) - assert getattr(funky.inputs, "a") == {"el1": 1, "el2": 3.5} + funky = TestFunc(a={"el1": 1, "el2": 3.5}) + assert getattr(funky, "a") == {"el1": 1, "el2": 3.5} def test_annotated_input_func_4a(): """the function with annotated input (dictionary of floats)""" - @mark.task - def testfunc(a: ty.Dict[str, float]): + @python.define + def TestFunc(a: ty.Dict[str, float]): return sum(a.values()) - funky = testfunc(a={"el1": 1, "el2": 3.5}) - assert getattr(funky.inputs, "a") == {"el1": 1, "el2": 3.5} + funky = TestFunc(a={"el1": 1, "el2": 3.5}) + assert getattr(funky, "a") == {"el1": 1, "el2": 3.5} def test_annotated_input_func_4b_excep(): """the function with annotated input (dictionary of ints, but float provided)""" - @mark.task - def testfunc(a: ty.Dict[str, int]): + @python.define + def TestFunc(a: ty.Dict[str, int]): return sum(a.values()) with pytest.raises(TypeError): - testfunc(a={"el1": 1, "el2": 3.5}) + TestFunc(a={"el1": 1, "el2": 3.5}) def test_annotated_input_func_5(): @@ -295,12 +279,12 @@ def test_annotated_input_func_5(): so no error for 3.5 """ - @mark.task - def testfunc(a: ty.Dict[str, ty.List]): + @python.define + def TestFunc(a: ty.Dict[str, ty.List]): return sum(a["el1"]) - funky = testfunc(a={"el1": [1, 3.5]}) - assert getattr(funky.inputs, "a") == {"el1": [1, 3.5]} + funky = TestFunc(a={"el1": [1, 3.5]}) + assert getattr(funky, "a") == {"el1": [1, 3.5]} def test_annotated_input_func_5a_except(): @@ -308,12 +292,12 @@ def test_annotated_input_func_5a_except(): list is provided as a dict value (instead a dict), so error is raised """ - @mark.task - def testfunc(a: ty.Dict[str, ty.Dict[str, float]]): + @python.define + def TestFunc(a: ty.Dict[str, ty.Dict[str, float]]): return sum(a["el1"]) with pytest.raises(TypeError): - testfunc(a={"el1": [1, 3.5]}) + TestFunc(a={"el1": [1, 3.5]}) def test_annotated_input_func_6(): @@ -321,12 +305,12 @@ def test_annotated_input_func_6(): the validator should unpack values from the Union """ - @mark.task - def testfunc(a: ty.Dict[str, ty.Union[float, int]]): + @python.define + def TestFunc(a: ty.Dict[str, ty.Union[float, int]]): return sum(a["el1"]) - funky = testfunc(a={"el1": 1, "el2": 3.5}) - assert getattr(funky.inputs, "a") == {"el1": 1, "el2": 3.5} + funky = TestFunc(a={"el1": 1, "el2": 3.5}) + assert getattr(funky, "a") == {"el1": 1, "el2": 3.5} def test_annotated_input_func_6a_excep(): @@ -334,12 +318,12 @@ def test_annotated_input_func_6a_excep(): the validator should unpack values from the Union and raise an error for 3.5 """ - @mark.task - def testfunc(a: ty.Dict[str, ty.Union[str, int]]): + @python.define + def TestFunc(a: ty.Dict[str, ty.Union[str, int]]): return sum(a["el1"]) with pytest.raises(TypeError): - testfunc(a={"el1": 1, "el2": 3.5}) + TestFunc(a={"el1": 1, "el2": 3.5}) def test_annotated_input_func_7(): @@ -348,12 +332,12 @@ def test_annotated_input_func_7(): it should work, the validator tries to guess if this is a field with a splitter """ - @mark.task - def testfunc(a: float): + @python.define + def TestFunc(a: float): return a - funky = testfunc().split("a", a=[3.5, 2.1]) - assert getattr(funky.inputs, "a") == [3.5, 2.1] + funky = TestFunc().split("a", a=[3.5, 2.1]) + assert getattr(funky, "a") == [3.5, 2.1] def test_annotated_input_func_7a_excep(): @@ -361,12 +345,12 @@ def test_annotated_input_func_7a_excep(): list of float provided - should raise an error (list of int would be fine) """ - @mark.task - def testfunc(a: int): + @python.define + def TestFunc(a: int): return a with pytest.raises(TypeError): - testfunc(a=[3.5, 2.1]).split("a") + TestFunc(a=[3.5, 2.1]).split("a") def test_annotated_input_func_8(): @@ -374,14 +358,14 @@ def test_annotated_input_func_8(): a single value is provided and should be converted to a list """ - @mark.task - def testfunc(a: MultiInputObj): + @python.define + def TestFunc(a: MultiInputObj): return len(a) - funky = testfunc(a=3.5) - assert getattr(funky.inputs, "a") == [3.5] - res = funky() - assert res.output.out == 1 + funky = TestFunc(a=3.5) + assert getattr(funky, "a") == [3.5] + outputs = funky() + assert outputs.out == 1 def test_annotated_input_func_8a(): @@ -389,14 +373,14 @@ def test_annotated_input_func_8a(): a 1-el list is provided so shouldn't be changed """ - @mark.task - def testfunc(a: MultiInputObj): + @python.define + def TestFunc(a: MultiInputObj): return len(a) - funky = testfunc(a=[3.5]) - assert getattr(funky.inputs, "a") == [3.5] - res = funky() - assert res.output.out == 1 + funky = TestFunc(a=[3.5]) + assert getattr(funky, "a") == [3.5] + outputs = funky() + assert outputs.out == 1 def test_annotated_input_func_8b(): @@ -405,116 +389,114 @@ def test_annotated_input_func_8b(): (input should still be converted to a list) """ - @mark.task - def testfunc(a: MultiInputObj): + @python.define + def TestFunc(a: MultiInputObj): return len(a) - funky = testfunc() + funky = TestFunc() # setting a after init - funky.inputs.a = 3.5 - assert getattr(funky.inputs, "a") == [3.5] - res = funky() - assert res.output.out == 1 + funky.a = 3.5 + assert getattr(funky, "a") == [3.5] + outputs = funky() + assert outputs.out == 1 def test_annotated_func_multreturn_exception(): """function has two elements in the return statement, - but three element provided in the spec - should raise an error + but three element provided in the definition - should raise an error """ - @mark.task - def testfunc( + @python.define(outputs={"fractional": float, "integer": int, "who_knows": int}) + def TestFunc( a: float, - ) -> ty.NamedTuple( - "Output", [("fractional", float), ("integer", int), ("who_knows", int)] ): import math return math.modf(a) - funky = testfunc(a=3.5) + funky = TestFunc(a=3.5) with pytest.raises(Exception) as excinfo: funky() assert "expected 3 elements" in str(excinfo.value) -def test_halfannotated_func(): - @mark.task - def testfunc(a, b) -> int: +def test_halfannotated_func(tmp_path): + + cache_dir = tmp_path / "cache" + cache_dir.mkdir() + + @python.define + def TestFunc(a, b) -> int: return a + b - funky = testfunc(a=10, b=20) - assert hasattr(funky.inputs, "a") - assert hasattr(funky.inputs, "b") - assert hasattr(funky.inputs, "_func") - assert getattr(funky.inputs, "a") == 10 - assert getattr(funky.inputs, "b") == 20 - assert getattr(funky.inputs, "_func") is not None - assert set(funky.output_names) == {"out"} - assert funky.__class__.__name__ + "_" + funky.inputs.hash == funky.checksum - - result = funky() - assert hasattr(result, "output") - assert hasattr(result.output, "out") - assert result.output.out == 30 - - assert os.path.exists(funky.cache_dir / funky.checksum / "_result.pklz") - - funky.result() # should not recompute - funky.inputs.a = 11 - assert funky.result() is None - funky() - result = funky.result() - assert result.output.out == 31 - help = funky.help(returnhelp=True) + funky = TestFunc(a=10, b=20) + assert hasattr(funky, "a") + assert hasattr(funky, "b") + assert hasattr(funky, "function") + assert getattr(funky, "a") == 10 + assert getattr(funky, "b") == 20 + assert getattr(funky, "function") is not None + assert set(f.name for f in list_fields(funky.Outputs)) == {"out"} + + outputs = funky(cache_dir=cache_dir) + assert hasattr(outputs, "out") + assert outputs.out == 30 + + assert Path(cache_dir / f"python-{funky._hash}" / "_result.pklz").exists() + + funky(cache_dir=cache_dir) # should not recompute + funky.a = 11 + assert not Path(cache_dir / f"python-{funky._hash}").exists() + outputs = funky(cache_dir=cache_dir) + assert outputs.out == 31 + help = print_help(funky) assert help == [ - "Help for FunctionTask", + "Help for TestFunc", "Input Parameters:", - "- a: _empty", - "- b: _empty", - "- _func: bytes", + "- a: Any", + "- b: Any", "Output Parameters:", "- out: int", ] -def test_halfannotated_func_multreturn(): - @mark.task - def testfunc(a, b) -> (int, int): +def test_halfannotated_func_multreturn(tmp_path): + + cache_dir = tmp_path / "cache" + cache_dir.mkdir() + + @python.define(outputs=["out1", "out2"]) + def TestFunc(a, b) -> tuple[int, int]: return a + 1, b + 1 - funky = testfunc(a=10, b=20) - assert hasattr(funky.inputs, "a") - assert hasattr(funky.inputs, "b") - assert hasattr(funky.inputs, "_func") - assert getattr(funky.inputs, "a") == 10 - assert getattr(funky.inputs, "b") == 20 - assert getattr(funky.inputs, "_func") is not None - assert set(funky.output_names) == {"out1", "out2"} - assert funky.__class__.__name__ + "_" + funky.inputs.hash == funky.checksum - - result = funky() - assert hasattr(result, "output") - assert hasattr(result.output, "out1") - assert result.output.out1 == 11 - - assert os.path.exists(funky.cache_dir / funky.checksum / "_result.pklz") - - funky.result() # should not recompute - funky.inputs.a = 11 - assert funky.result() is None - funky() - result = funky.result() - assert result.output.out1 == 12 - help = funky.help(returnhelp=True) + funky = TestFunc(a=10, b=20) + assert hasattr(funky, "a") + assert hasattr(funky, "b") + assert hasattr(funky, "function") + assert getattr(funky, "a") == 10 + assert getattr(funky, "b") == 20 + assert getattr(funky, "function") is not None + assert set(f.name for f in list_fields(funky.Outputs)) == {"out1", "out2"} + + outputs = funky(cache_dir=cache_dir) + assert hasattr(outputs, "out1") + assert outputs.out1 == 11 + + assert Path(cache_dir / f"python-{funky._hash}" / "_result.pklz").exists() + + funky(cache_dir=cache_dir) # should not recompute + funky.a = 11 + assert not Path(cache_dir / f"python-{funky._hash}" / "_result.pklz").exists() + outputs = funky(cache_dir=cache_dir) + assert outputs.out1 == 12 + help = print_help(funky) assert help == [ - "Help for FunctionTask", + "Help for TestFunc", "Input Parameters:", - "- a: _empty", - "- b: _empty", - "- _func: bytes", + "- a: Any", + "- b: Any", "Output Parameters:", "- out1: int", "- out2: int", @@ -522,44 +504,43 @@ def testfunc(a, b) -> (int, int): def test_notannotated_func(): - @mark.task - def no_annots(c, d): + @python.define + def NoAnnots(c, d): return c + d - natask = no_annots(c=17, d=3.2) - assert hasattr(natask.inputs, "c") - assert hasattr(natask.inputs, "d") - assert hasattr(natask.inputs, "_func") + no_annots = NoAnnots(c=17, d=3.2) + assert hasattr(no_annots, "c") + assert hasattr(no_annots, "d") + assert hasattr(no_annots, "function") - result = natask._run() - assert hasattr(result, "output") - assert hasattr(result.output, "out") - assert result.output.out == 20.2 + outputs = no_annots() + assert hasattr(outputs, "out") + assert outputs.out == 20.2 def test_notannotated_func_returnlist(): - @mark.task - def no_annots(c, d): + @python.define + def NoAnnots(c, d): return [c, d] - natask = no_annots(c=17, d=3.2) - result = natask._run() - assert hasattr(result.output, "out") - assert result.output.out == [17, 3.2] + no_annots = NoAnnots(c=17, d=3.2) + outputs = no_annots() + assert hasattr(outputs, "out") + assert outputs.out == [17, 3.2] def test_halfannotated_func_multrun_returnlist(): - @mark.task - def no_annots(c, d) -> (list, float): + @python.define(outputs=["out1", "out2"]) + def NoAnnots(c, d) -> tuple[list, float]: return [c, d], c + d - natask = no_annots(c=17, d=3.2) - result = natask._run() + no_annots = NoAnnots(c=17, d=3.2) + outputs = no_annots() - assert hasattr(result.output, "out1") - assert hasattr(result.output, "out2") - assert result.output.out1 == [17, 3.2] - assert result.output.out2 == 20.2 + assert hasattr(outputs, "out1") + assert hasattr(outputs, "out2") + assert outputs.out1 == [17, 3.2] + assert outputs.out2 == 20.2 def test_notannotated_func_multreturn(): @@ -567,36 +548,29 @@ def test_notannotated_func_multreturn(): all elements should be returned as a tuple and set to "out" """ - @mark.task - def no_annots(c, d): + @python.define + def NoAnnots(c, d): return c + d, c - d - natask = no_annots(c=17, d=3.2) - assert hasattr(natask.inputs, "c") - assert hasattr(natask.inputs, "d") - assert hasattr(natask.inputs, "_func") + no_annots = NoAnnots(c=17, d=3.2) + assert hasattr(no_annots, "c") + assert hasattr(no_annots, "d") + assert hasattr(no_annots, "function") - result = natask._run() - assert hasattr(result, "output") - assert hasattr(result.output, "out") - assert result.output.out == (20.2, 13.8) + outputs = no_annots() + assert hasattr(outputs, "out") + assert outputs.out == (20.2, 13.8) def test_input_spec_func_1(): """the function w/o annotated, but input_spec is used""" - @mark.task - def testfunc(a): + @python.define(inputs={"a": python.arg(type=float, help="input a")}) + def TestFunc(a): return a - my_input_spec = SpecInfo( - name="Input", - fields=[("a", attr.ib(type=float, metadata={"help_string": "input a"}))], - bases=(FunctionSpec,), - ) - - funky = testfunc(a=3.5, input_spec=my_input_spec) - assert getattr(funky.inputs, "a") == 3.5 + funky = TestFunc(a=3.5) + assert funky.a == 3.5 def test_input_spec_func_1a_except(): @@ -604,17 +578,12 @@ def test_input_spec_func_1a_except(): a TypeError is raised (float is provided instead of int) """ - @mark.task - def testfunc(a): + @python.define(inputs={"a": python.arg(type=int, help="input a")}) + def TestFunc(a): return a - my_input_spec = SpecInfo( - name="Input", - fields=[("a", attr.ib(type=int, metadata={"help_string": "input a"}))], - bases=(FunctionSpec,), - ) with pytest.raises(TypeError): - testfunc(a=3.5, input_spec=my_input_spec) + TestFunc(a=3.5) def test_input_spec_func_1b_except(): @@ -622,22 +591,13 @@ def test_input_spec_func_1b_except(): metadata checks raise an error """ - @mark.task - def testfunc(a): - return a + with pytest.raises( + TypeError, match="got an unexpected keyword argument 'position'" + ): - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "a", - attr.ib(type=float, metadata={"position": 1, "help_string": "input a"}), - ) - ], - bases=(FunctionSpec,), - ) - with pytest.raises(AttributeError, match="only these keys are supported"): - testfunc(a=3.5, input_spec=my_input_spec) + @python.define(inputs={"a": python.arg(type=float, position=1, help="input a")}) + def TestFunc(a): + return a def test_input_spec_func_1d_except(): @@ -645,13 +605,12 @@ def test_input_spec_func_1d_except(): input_spec doesn't contain 'a' input, an error is raised """ - @mark.task - def testfunc(a): + @python.define + def TestFunc(a): return a - my_input_spec = SpecInfo(name="Input", fields=[], bases=(FunctionSpec,)) - funky = testfunc(a=3.5, input_spec=my_input_spec) - with pytest.raises(TypeError, match="missing 1 required positional argument"): + funky = TestFunc() + with pytest.raises(ValueError, match="Mandatory field 'a' is not set"): funky() @@ -660,18 +619,12 @@ def test_input_spec_func_2(): input_spec changes the type of the input (so error is not raised) """ - @mark.task - def testfunc(a: int): + @python.define(inputs={"a": python.arg(type=float, help="input a")}) + def TestFunc(a: int): return a - my_input_spec = SpecInfo( - name="Input", - fields=[("a", attr.ib(type=float, metadata={"help_string": "input a"}))], - bases=(FunctionSpec,), - ) - - funky = testfunc(a=3.5, input_spec=my_input_spec) - assert getattr(funky.inputs, "a") == 3.5 + funky = TestFunc(a=3.5) + assert funky.a == 3.5 def test_input_spec_func_2a(): @@ -680,18 +633,12 @@ def test_input_spec_func_2a(): using the shorter syntax """ - @mark.task - def testfunc(a: int): + @python.define(inputs={"a": python.arg(type=float, help="input a")}) + def TestFunc(a: int): return a - my_input_spec = SpecInfo( - name="Input", - fields=[("a", float, {"help_string": "input a"})], - bases=(FunctionSpec,), - ) - - funky = testfunc(a=3.5, input_spec=my_input_spec) - assert getattr(funky.inputs, "a") == 3.5 + funky = TestFunc(a=3.5) + assert funky.a == 3.5 def test_input_spec_func_3(): @@ -699,26 +646,20 @@ def test_input_spec_func_3(): additional keys (allowed_values) are used in metadata """ - @mark.task - def testfunc(a): - return a - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "a", - attr.ib( - type=int, - metadata={"help_string": "input a", "allowed_values": [0, 1, 2]}, - ), + @python.define( + inputs={ + "a": python.arg( + type=int, + help="input a", + allowed_values=[0, 1, 2], ) - ], - bases=(FunctionSpec,), + } ) + def TestFunc(a): + return a - funky = testfunc(a=2, input_spec=my_input_spec) - assert getattr(funky.inputs, "a") == 2 + funky = TestFunc(a=2) + assert funky.a == 2 def test_input_spec_func_3a_except(): @@ -726,26 +667,20 @@ def test_input_spec_func_3a_except(): allowed_values is used in metadata and the ValueError is raised """ - @mark.task - def testfunc(a): - return a - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "a", - attr.ib( - type=int, - metadata={"help_string": "input a", "allowed_values": [0, 1, 2]}, - ), + @python.define( + inputs={ + "a": python.arg( + type=int, + help="input a", + allowed_values=[0, 1, 2], ) - ], - bases=(FunctionSpec,), + } ) + def TestFunc(a): + return a with pytest.raises(ValueError, match="value of a has to be"): - testfunc(a=3, input_spec=my_input_spec) + TestFunc(a=3) def test_input_spec_func_4(): @@ -753,31 +688,17 @@ def test_input_spec_func_4(): but b is set as mandatory in the input_spec, so error is raised if not provided """ - @mark.task - def testfunc(a, b=1): - return a + b - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "a", - attr.ib( - type=int, metadata={"help_string": "input a", "mandatory": True} - ), - ), - ( - "b", - attr.ib( - type=int, metadata={"help_string": "input b", "mandatory": True} - ), - ), - ], - bases=(FunctionSpec,), + @python.define( + inputs={ + "a": python.arg(type=int, help="input a"), + "b": python.arg(type=int, help="input b"), + } ) + def TestFunc(a, b): + return a + b - funky = testfunc(a=2, input_spec=my_input_spec) - with pytest.raises(Exception, match="b is mandatory"): + funky = TestFunc(a=2) + with pytest.raises(Exception, match="Mandatory field 'b' is not set"): funky() @@ -786,68 +707,45 @@ def test_input_spec_func_4a(): has a different default value, so value from the function is overwritten """ - @mark.task - def testfunc(a, b=1): - return a + b - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "a", - attr.ib( - type=int, metadata={"help_string": "input a", "mandatory": True} - ), - ), - ("b", attr.ib(type=int, default=10, metadata={"help_string": "input b"})), - ], - bases=(FunctionSpec,), + @python.define( + inputs={ + "a": python.arg(type=int, help="input a"), + "b": python.arg(type=int, help="input b", default=10), + } ) + def TestFunc(a, b=1): + return a + b - funky = testfunc(a=2, input_spec=my_input_spec) - res = funky() - assert res.output.out == 12 + funky = TestFunc(a=2) + outputs = funky() + assert outputs.out == 12 def test_input_spec_func_5(): - """the FunctionTask with input_spec, a input has MultiInputObj type + """the PythonTask with input_spec, a input has MultiInputObj type a single value is provided and should be converted to a list """ - @mark.task - def testfunc(a): + @python.define(inputs={"a": python.arg(type=MultiInputObj, help="input a")}) + def TestFunc(a): return len(a) - my_input_spec = SpecInfo( - name="Input", - fields=[ - ("a", attr.ib(type=MultiInputObj, metadata={"help_string": "input a"})) - ], - bases=(FunctionSpec,), - ) - - funky = testfunc(a=3.5, input_spec=my_input_spec) - assert getattr(funky.inputs, "a") == MultiInputObj([3.5]) - res = funky() - assert res.output.out == 1 + funky = TestFunc(a=3.5) + assert funky.a == MultiInputObj([3.5]) + outputs = funky() + assert outputs.out == 1 def test_output_spec_func_1(): """the function w/o annotated, but output_spec is used""" - @mark.task - def testfunc(a): + @python.define(outputs={"out1": python.out(type=float, help="output")}) + def TestFunc(a): return a - my_output_spec = SpecInfo( - name="Output", - fields=[("out1", attr.ib(type=float, metadata={"help_string": "output"}))], - bases=(BaseSpec,), - ) - - funky = testfunc(a=3.5, output_spec=my_output_spec) - res = funky() - assert res.output.out1 == 3.5 + funky = TestFunc(a=3.5) + outputs = funky() + assert outputs.out1 == 3.5 def test_output_spec_func_1a_except(): @@ -855,17 +753,11 @@ def test_output_spec_func_1a_except(): float returned instead of int - TypeError """ - @mark.task - def testfunc(a): + @python.define(outputs={"out1": python.out(type=int, help="output")}) + def TestFunc(a): return a - my_output_spec = SpecInfo( - name="Output", - fields=[("out1", attr.ib(type=int, metadata={"help_string": "output"}))], - bases=(BaseSpec,), - ) - - funky = testfunc(a=3.5, output_spec=my_output_spec) + funky = TestFunc(a=3.5) with pytest.raises(TypeError): funky() @@ -875,19 +767,13 @@ def test_output_spec_func_2(): output_spec changes the type of the output (so error is not raised) """ - @mark.task - def testfunc(a) -> int: + @python.define(outputs={"out1": python.out(type=float, help="output")}) + def TestFunc(a) -> int: return a - my_output_spec = SpecInfo( - name="Output", - fields=[("out1", attr.ib(type=float, metadata={"help_string": "output"}))], - bases=(BaseSpec,), - ) - - funky = testfunc(a=3.5, output_spec=my_output_spec) - res = funky() - assert res.output.out1 == 3.5 + funky = TestFunc(a=3.5) + outputs = funky() + assert outputs.out1 == 3.5 def test_output_spec_func_2a(): @@ -896,19 +782,13 @@ def test_output_spec_func_2a(): using a shorter syntax """ - @mark.task - def testfunc(a) -> int: + @python.define(outputs={"out1": python.out(type=float, help="output")}) + def TestFunc(a) -> int: return a - my_output_spec = SpecInfo( - name="Output", - fields=[("out1", float, {"help_string": "output"})], - bases=(BaseSpec,), - ) - - funky = testfunc(a=3.5, output_spec=my_output_spec) - res = funky() - assert res.output.out1 == 3.5 + funky = TestFunc(a=3.5) + outputs = funky() + assert outputs.out1 == 3.5 def test_output_spec_func_3(): @@ -916,24 +796,13 @@ def test_output_spec_func_3(): MultiOutputObj is used, output is a 2-el list, so converter doesn't do anything """ - @mark.task - def testfunc(a, b): + @python.define(outputs={"out_list": python.out(type=MultiOutputObj, help="output")}) + def TestFunc(a, b): return [a, b] - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_list", - attr.ib(type=MultiOutputObj, metadata={"help_string": "output"}), - ) - ], - bases=(BaseSpec,), - ) - - funky = testfunc(a=3.5, b=1, output_spec=my_output_spec) - res = funky() - assert res.output.out_list == [3.5, 1] + funky = TestFunc(a=3.5, b=1) + outputs = funky() + assert outputs.out_list == [3.5, 1] def test_output_spec_func_4(): @@ -941,28 +810,17 @@ def test_output_spec_func_4(): MultiOutputObj is used, output is a 1el list, so converter return the element """ - @mark.task - def testfunc(a): + @python.define(outputs={"out_list": python.out(type=MultiOutputObj, help="output")}) + def TestFunc(a): return [a] - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_1el", - attr.ib(type=MultiOutputObj, metadata={"help_string": "output"}), - ) - ], - bases=(BaseSpec,), - ) - - funky = testfunc(a=3.5, output_spec=my_output_spec) - res = funky() - assert res.output.out_1el == 3.5 + funky = TestFunc(a=3.5) + outputs = funky() + assert outputs.out_list == 3.5 def test_exception_func(): - @mark.task + @python.define def raise_exception(c, d): raise Exception() @@ -973,70 +831,72 @@ def raise_exception(c, d): def test_result_none_1(): """checking if None is properly returned as the result""" - @mark.task - def fun_none(x): + @python.define + def FunNone(x): return None - task = fun_none(name="none", x=3) - res = task() - assert res.output.out is None + task = FunNone(x=3) + outputs = task() + assert outputs.out is None def test_result_none_2(): """checking if None is properly set for all outputs""" - @mark.task - def fun_none(x) -> (ty.Any, ty.Any): - return None + @python.define(outputs=["out1", "out2"]) + def FunNone(x) -> tuple[ty.Any, ty.Any]: + return None # Do we actually want this behaviour? - task = fun_none(name="none", x=3) - res = task() - assert res.output.out1 is None - assert res.output.out2 is None + task = FunNone(x=3) + outputs = task() + assert outputs.out1 is None + assert outputs.out2 is None def test_audit_prov( tmpdir, ): - @mark.task - def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + @python.define(outputs={"out": float}) + def TestFunc(a: int, b: float = 0.1): return a + b # printing the audit message - funky = testfunc(a=1, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) - funky.cache_dir = tmpdir - funky() + funky = TestFunc(a=1) + funky(cache_dir=tmpdir, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) # saving the audit message into the file - funky = testfunc(a=2, audit_flags=AuditFlag.PROV, messengers=FileMessenger()) - funky.cache_dir = tmpdir - funky() + funky = TestFunc(a=2) + funky(cache_dir=tmpdir, audit_flags=AuditFlag.PROV, messengers=FileMessenger()) # this should be the default loctaion - message_path = tmpdir / funky.checksum / "messages" - assert (tmpdir / funky.checksum / "messages").exists() + message_path = tmpdir / funky._checksum / "messages" + assert (tmpdir / funky._checksum / "messages").exists() - collect_messages(tmpdir / funky.checksum, message_path, ld_op="compact") - assert (tmpdir / funky.checksum / "messages.jsonld").exists() + collect_messages(tmpdir / funky._checksum, message_path, ld_op="compact") + assert (tmpdir / funky._checksum / "messages.jsonld").exists() def test_audit_task(tmpdir): - @mark.task - def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + @python.define(outputs={"out": float}) + def TestFunc(a: int, b: float = 0.1): return a + b from glob import glob - funky = testfunc(a=2, audit_flags=AuditFlag.PROV, messengers=FileMessenger()) - funky.cache_dir = tmpdir - funky() - message_path = tmpdir / funky.checksum / "messages" + funky = TestFunc(a=2) + funky( + cache_dir=tmpdir, + audit_flags=AuditFlag.PROV, + messengers=FileMessenger(), + name="TestFunc", + ) + message_path = tmpdir / funky._checksum / "messages" for file in glob(str(message_path) + "/*.jsonld"): with open(file) as f: data = json.load(f) if "@type" in data: if "AssociatedWith" in data: - assert "testfunc" in data["Label"] + assert "main" in data["Label"] if "@type" in data: if data["@type"] == "input": @@ -1048,20 +908,19 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] def test_audit_shellcommandtask(tmpdir): - args = "-l" - shelly = ShellCommandTask( - name="shelly", - executable="ls", - args=args, - audit_flags=AuditFlag.PROV, - messengers=FileMessenger(), - ) + Shelly = shell.define("ls -l") from glob import glob - shelly.cache_dir = tmpdir - shelly() - message_path = tmpdir / shelly.checksum / "messages" + shelly = Shelly() + + shelly( + cache_dir=tmpdir, + audit_flags=AuditFlag.PROV, + messengers=FileMessenger(), + name="shelly", + ) + message_path = tmpdir / shelly._checksum / "messages" # go through each jsonld file in message_path and check if the label field exists command_content = [] @@ -1072,7 +931,7 @@ def test_audit_shellcommandtask(tmpdir): if "@type" in data: if "AssociatedWith" in data: - assert "shelly" in data["Label"] + assert "main" == data["Label"] if "@type" in data: if data["@type"] == "input": @@ -1087,68 +946,48 @@ def test_audit_shellcommandtask(tmpdir): def test_audit_shellcommandtask_file(tmp_path): # sourcery skip: use-fstring-for-concatenation - import glob - import shutil - # create test.txt file with "This is a test" in it in the tmpdir # create txt file in cwd - with open("test.txt", "w") as f: + test1_file = tmp_path / "test.txt" + test2_file = tmp_path / "test2.txt" + with open(test1_file, "w") as f: f.write("This is a test") - with open("test2.txt", "w") as f: + with open(test2_file, "w") as f: f.write("This is a test") - # copy the test.txt file to the tmpdir - shutil.copy("test.txt", tmp_path) - shutil.copy("test2.txt", tmp_path) - cmd = "cat" - file_in = File(tmp_path / "test.txt") - file_in_2 = File(tmp_path / "test2.txt") + file_in = File(test1_file) + file_in_2 = File(test2_file) test_file_hash = hash_function(file_in) test_file_hash_2 = hash_function(file_in_2) - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "in_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help_string": "text", - "mandatory": True, - }, - ), + Shelly = shell.define( + cmd, + inputs={ + "in_file": shell.arg( + type=File, + position=1, + argstr="", + help="text", ), - ( - "in_file_2", - attr.ib( - type=File, - metadata={ - "position": 2, - "argstr": "", - "help_string": "text", - "mandatory": True, - }, - ), + "in_file_2": shell.arg( + type=File, + position=2, + argstr="", + help="text", ), - ], - bases=(ShellSpec,), + }, ) - shelly = ShellCommandTask( - name="shelly", + shelly = Shelly( in_file=file_in, in_file_2=file_in_2, - input_spec=my_input_spec, - executable=cmd, + ) + shelly( + cache_dir=tmp_path, audit_flags=AuditFlag.PROV, messengers=FileMessenger(), ) - shelly.cache_dir = tmp_path - results = shelly() - message_path = tmp_path / shelly.checksum / "messages" + message_path = tmp_path / shelly._hash / "messages" for file in glob.glob(str(message_path) + "/*.jsonld"): with open(file) as x: data = json.load(x) @@ -1169,20 +1008,19 @@ def test_audit_shellcommandtask_version(tmpdir): "utf-8" ) version_cmd = version_cmd.splitlines()[0] - cmd = "less" - shelly = ShellCommandTask( + cmd = "less test_task.py" + Shelly = shell.define(cmd) + shelly = Shelly() + + import glob + + shelly( + cache_dir=tmpdir, name="shelly", - executable=cmd, - args="test_task.py", audit_flags=AuditFlag.PROV, messengers=FileMessenger(), ) - - import glob - - shelly.cache_dir = tmpdir - shelly() - message_path = tmpdir / shelly.checksum / "messages" + message_path = tmpdir / shelly._checksum / "messages" # go through each jsonld file in message_path and check if the label field exists version_content = [] for file in glob.glob(str(message_path) + "/*.jsonld"): @@ -1200,27 +1038,32 @@ def test_audit_prov_messdir_1( ): """customized messenger dir""" - @mark.task - def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + @python.define(outputs={"out": float}) + def TestFunc(a: int, b: float = 0.1): return a + b # printing the audit message - funky = testfunc(a=1, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) - funky.cache_dir = tmpdir - funky() + funky = TestFunc(a=1) + funky(cache_dir=tmpdir, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) # saving the audit message into the file - funky = testfunc(a=2, audit_flags=AuditFlag.PROV, messengers=FileMessenger()) + funky = TestFunc(a=2) # user defined path - message_path = tmpdir / funky.checksum / "my_messages" - funky.cache_dir = tmpdir + message_path = tmpdir / funky._checksum / "my_messages" # providing messenger_dir for audit - funky.audit.messenger_args = dict(message_dir=message_path) - funky() - assert (tmpdir / funky.checksum / "my_messages").exists() + funky_task = Task( + definition=funky, + submitter=Submitter( + cache_dir=tmpdir, audit_flags=AuditFlag.PROV, messengers=FileMessenger() + ), + name="funky", + ) + funky_task.audit.messenger_args = dict(message_dir=message_path) + funky_task.run() + assert (tmpdir / funky._checksum / "my_messages").exists() - collect_messages(tmpdir / funky.checksum, message_path, ld_op="compact") - assert (tmpdir / funky.checksum / "messages.jsonld").exists() + collect_messages(tmpdir / funky._checksum, message_path, ld_op="compact") + assert (tmpdir / funky._checksum / "messages.jsonld").exists() def test_audit_prov_messdir_2( @@ -1228,27 +1071,25 @@ def test_audit_prov_messdir_2( ): """customized messenger dir in init""" - @mark.task - def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + @python.define(outputs={"out": float}) + def TestFunc(a: int, b: float = 0.1): return a + b # printing the audit message - funky = testfunc(a=1, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) - funky.cache_dir = tmpdir - funky() + funky = TestFunc(a=1) + funky(cache_dir=tmpdir, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) # user defined path (doesn't depend on checksum, can be defined before init) message_path = tmpdir / "my_messages" # saving the audit message into the file - funky = testfunc( - a=2, + funky = TestFunc(a=2) + # providing messenger_dir for audit + funky( + cache_dir=tmpdir, audit_flags=AuditFlag.PROV, messengers=FileMessenger(), messenger_args=dict(message_dir=message_path), ) - funky.cache_dir = tmpdir - # providing messenger_dir for audit - funky() assert (tmpdir / "my_messages").exists() collect_messages(tmpdir, message_path, ld_op="compact") @@ -1260,50 +1101,55 @@ def test_audit_prov_wf( ): """FileMessenger for wf""" - @mark.task - def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + @python.define(outputs={"out": float}) + def TestFunc(a: int, b: float = 0.1): return a + b - wf = Workflow( + @workflow.define + def Workflow(x: int): + test_func = workflow.add(TestFunc(a=x)) + return test_func.out + + wf = Workflow(x=2) + + wf( name="wf", - input_spec=["x"], cache_dir=tmpdir, audit_flags=AuditFlag.PROV, messengers=FileMessenger(), ) - wf.add(testfunc(name="testfunc", a=wf.lzin.x)) - wf.set_output([("out", wf.testfunc.lzout.out)]) - wf.inputs.x = 2 - - wf(plugin="cf") # default path - message_path = tmpdir / wf.checksum / "messages" + message_path = tmpdir / wf._checksum / "messages" assert message_path.exists() - collect_messages(tmpdir / wf.checksum, message_path, ld_op="compact") - assert (tmpdir / wf.checksum / "messages.jsonld").exists() + collect_messages(tmpdir / wf._checksum, message_path, ld_op="compact") + assert (tmpdir / wf._checksum / "messages.jsonld").exists() def test_audit_all( tmpdir, ): - @mark.task - def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + @python.define(outputs={"out": float}) + def TestFunc(a: int, b: float = 0.1): return a + b - funky = testfunc(a=2, audit_flags=AuditFlag.ALL, messengers=FileMessenger()) - message_path = tmpdir / funky.checksum / "messages" - funky.cache_dir = tmpdir - funky.audit.messenger_args = dict(message_dir=message_path) - funky() + funky = TestFunc(a=2) + message_path = tmpdir / funky._checksum / "messages" + + funky( + cache_dir=tmpdir, + audit_flags=AuditFlag.ALL, + messengers=FileMessenger(), + messenger_args=dict(message_dir=message_path), + ) from glob import glob - assert len(glob(str(tmpdir / funky.checksum / "proc*.log"))) == 1 + assert len(glob(str(tmpdir / funky._checksum / "proc*.log"))) == 1 assert len(glob(str(message_path / "*.jsonld"))) == 7 # commented out to speed up testing - collect_messages(tmpdir / funky.checksum, message_path, ld_op="compact") - assert (tmpdir / funky.checksum / "messages.jsonld").exists() + collect_messages(tmpdir / funky._checksum, message_path, ld_op="compact") + assert (tmpdir / funky._checksum / "messages.jsonld").exists() @no_win @@ -1311,42 +1157,43 @@ def test_shell_cmd(tmpdir): cmd = ["echo", "hail", "pydra"] # all args given as executable - shelly = ShellCommandTask(name="shelly", executable=cmd) + Shelly = shell.define(" ".join(cmd)) + shelly = Shelly() assert shelly.cmdline == " ".join(cmd) - res = shelly._run() - assert res.output.stdout == " ".join(cmd[1:]) + "\n" + outputs = shelly() + assert outputs.stdout == " ".join(cmd[1:]) + "\n" # separate command into exec + args - shelly = ShellCommandTask(executable=cmd[0], args=cmd[1:]) - assert shelly.inputs.executable == "echo" + Shelly = shell.define( + cmd[0], inputs=[shell.arg(name=a, default=a) for a in cmd[1:]] + ) + shelly = Shelly() + assert shelly.executable == "echo" assert shelly.cmdline == " ".join(cmd) - res = shelly._run() - assert res.output.return_code == 0 - assert res.output.stdout == " ".join(cmd[1:]) + "\n" + outputs = shelly() + assert outputs.return_code == 0 + assert outputs.stdout == " ".join(cmd[1:]) + "\n" def test_functask_callable(tmpdir): # no submitter or plugin - foo = funaddtwo(a=1) - res = foo() - assert res.output.out == 3 - assert foo.plugin is None + foo = FunAddTwo(a=1) + outputs = foo() + assert outputs.out == 3 # plugin - bar = funaddtwo(a=2) - res = bar(plugin="cf") - assert res.output.out == 4 - assert bar.plugin is None + bar = FunAddTwo(a=2) + outputs = bar(worker="cf", cache_dir=tmpdir) + assert outputs.out == 4 - foo2 = funaddtwo(a=3) - foo2.plugin = "cf" - res = foo2() - assert res.output.out == 5 - assert foo2.plugin == "cf" +def test_taskhooks_1(tmpdir: Path, capsys): + cache_dir = tmpdir / "cache" + cache_dir.mkdir() -def test_taskhooks_1(tmpdir, capsys): - foo = funaddtwo(name="foo", a=1, cache_dir=tmpdir) + foo = Task( + definition=FunAddTwo(a=1), submitter=Submitter(cache_dir=tmpdir), name="foo" + ) assert foo.hooks # ensure all hooks are defined for attr in ("pre_run", "post_run", "pre_run_task", "post_run_task"): @@ -1356,8 +1203,7 @@ def test_taskhooks_1(tmpdir, capsys): def myhook(task, *args): print("I was called") - foo.hooks.pre_run = myhook - foo() + FunAddTwo(a=1)(cache_dir=cache_dir, hooks=TaskHooks(pre_run=myhook)) captured = capsys.readouterr() assert "I was called\n" in captured.out del captured @@ -1366,42 +1212,33 @@ def myhook(task, *args): with pytest.raises(AttributeError): foo.hooks.mid_run = myhook - # set all hooks - foo.hooks.post_run = myhook - foo.hooks.pre_run_task = myhook - foo.hooks.post_run_task = myhook - foo.inputs.a = 2 # ensure not pre-cached - foo() - captured = capsys.readouterr() - assert captured.out.count("I was called\n") == 4 - del captured - - # hooks are independent across tasks by default - bar = funaddtwo(name="bar", a=3, cache_dir=tmpdir) - assert bar.hooks is not foo.hooks - # but can be shared across tasks - bar.hooks = foo.hooks - # and workflows - wf = gen_basic_wf() - wf.tmpdir = tmpdir - wf.hooks = bar.hooks - assert foo.hooks == bar.hooks == wf.hooks - - wf(plugin="cf") - captured = capsys.readouterr() - assert captured.out.count("I was called\n") == 4 - del captured - # reset all hooks foo.hooks.reset() for attr in ("pre_run", "post_run", "pre_run_task", "post_run_task"): hook = getattr(foo.hooks, attr) assert hook() is None + # clear cache + shutil.rmtree(cache_dir) + cache_dir.mkdir() + + # set all hooks + FunAddTwo(a=1)( + cache_dir=cache_dir, + hooks=TaskHooks( + pre_run=myhook, + post_run=myhook, + pre_run_task=myhook, + post_run_task=myhook, + ), + ) + captured = capsys.readouterr() + assert captured.out.count("I was called\n") == 4 + del captured + def test_taskhooks_2(tmpdir, capsys): """checking order of the hooks; using task's attributes""" - foo = funaddtwo(name="foo", a=1, cache_dir=tmpdir) def myhook_prerun(task, *args): print(f"i. prerun hook was called from {task.name}") @@ -1415,11 +1252,15 @@ def myhook_postrun_task(task, *args): def myhook_postrun(task, *args): print(f"iv. postrun hook was called {task.name}") - foo.hooks.pre_run = myhook_prerun - foo.hooks.post_run = myhook_postrun - foo.hooks.pre_run_task = myhook_prerun_task - foo.hooks.post_run_task = myhook_postrun_task - foo() + FunAddTwo(a=1)( + cache_dir=tmpdir, + hooks=TaskHooks( + pre_run=myhook_prerun, + post_run=myhook_postrun, + pre_run_task=myhook_prerun_task, + post_run_task=myhook_postrun_task, + ), + ) captured = capsys.readouterr() hook_messages = captured.out.strip().split("\n") @@ -1432,17 +1273,19 @@ def myhook_postrun(task, *args): def test_taskhooks_3(tmpdir, capsys): """checking results in the post run hooks""" - foo = funaddtwo(name="foo", a=1, cache_dir=tmpdir) + foo = Task( + definition=FunAddTwo(a=1), name="foo", submitter=Submitter(cache_dir=tmpdir) + ) def myhook_postrun_task(task, result, *args): - print(f"postrun task hook, the result is {result.output.out}") + print(f"postrun task hook, the result is {result.outputs.out}") def myhook_postrun(task, result, *args): - print(f"postrun hook, the result is {result.output.out}") + print(f"postrun hook, the result is {result.outputs.out}") foo.hooks.post_run = myhook_postrun foo.hooks.post_run_task = myhook_postrun_task - foo() + foo.run() captured = capsys.readouterr() hook_messages = captured.out.strip().split("\n") @@ -1453,7 +1296,6 @@ def myhook_postrun(task, result, *args): def test_taskhooks_4(tmpdir, capsys): """task raises an error: postrun task should be called, postrun shouldn't be called""" - foo = funaddtwo(name="foo", a="one", cache_dir=tmpdir) def myhook_postrun_task(task, result, *args): print(f"postrun task hook was called, result object is {result}") @@ -1461,11 +1303,11 @@ def myhook_postrun_task(task, result, *args): def myhook_postrun(task, result, *args): print("postrun hook should not be called") - foo.hooks.post_run = myhook_postrun - foo.hooks.post_run_task = myhook_postrun_task - with pytest.raises(Exception): - foo() + FunAddTwo(a="one")( + cache_dir=tmpdir, + hooks=TaskHooks(post_run=myhook_postrun, post_run_task=myhook_postrun_task), + ) captured = capsys.readouterr() hook_messages = captured.out.strip().split("\n") @@ -1480,17 +1322,18 @@ def test_traceback(tmpdir): full traceback including the line in the python function """ - @mark.task - def fun_error(x): + @python.define + def FunError(x): raise Exception("Error from the function") - task = fun_error(name="error", cache_dir=tmpdir).split("x", x=[3, 4]) - - with pytest.raises(Exception, match="from the function") as exinfo: - task() + with pytest.raises(Exception, match="Error from the function") as exinfo: + with Submitter(worker="cf", cache_dir=tmpdir) as sub: + sub(FunError(x=3), raise_errors=True) # getting error file from the error message - error_file_match = str(exinfo.value).split("here: ")[-1].split("_error.pklz")[0] + error_file_match = ( + str(exinfo.value.__notes__[0]).split("here: ")[-1].split("_error.pklz")[0] + ) error_file = Path(error_file_match) / "_error.pklz" # checking if the file exists assert error_file.exists() @@ -1498,7 +1341,7 @@ def fun_error(x): error_tb = cp.loads(error_file.read_bytes())["error message"] # the error traceback should be a list and should point to a specific line in the function assert isinstance(error_tb, list) - assert "in fun_error" in error_tb[-2] + assert "in FunError" in error_tb[-2] def test_traceback_wf(tmpdir): @@ -1507,19 +1350,24 @@ def test_traceback_wf(tmpdir): full traceback including the line in the python function """ - @mark.task - def fun_error(x): + @python.define + def FunError(x): raise Exception("Error from the function") - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir).split("x", x=[3, 4]) - wf.add(fun_error(name="error", x=wf.lzin.x)) - wf.set_output([("out", wf.error.lzout.out)]) + @workflow.define + def Workflow(x_list): + fun_error = workflow.add(FunError().split(x=x_list), name="fun_error") + return fun_error.out - with pytest.raises(Exception, match="Task error raised an error") as exinfo: - wf() + wf = Workflow(x_list=[3, 4]) + with pytest.raises(Exception, match="Task 'fun_error' raised an error.*") as exinfo: + with Submitter(worker="cf", cache_dir=tmpdir) as sub: + sub(wf, raise_errors=True) # getting error file from the error message - error_file_match = str(exinfo.value).split("here: ")[-1].split("_error.pklz")[0] + error_file_match = ( + str(exinfo.value).split("here: ")[-1].split("_error.pklz")[0].strip() + ) error_file = Path(error_file_match) / "_error.pklz" # checking if the file exists assert error_file.exists() @@ -1527,28 +1375,28 @@ def fun_error(x): error_tb = cp.loads(error_file.read_bytes())["error message"] # the error traceback should be a list and should point to a specific line in the function assert isinstance(error_tb, list) - assert "in fun_error" in error_tb[-2] + assert "in FunError" in error_tb[-2] def test_rerun_errored(tmpdir, capfd): """Test rerunning a task containing errors. Only the errored tasks should be rerun""" - @mark.task - def pass_odds(x): + @python.define + def PassOdds(x): if x % 2 == 0: - print(f"x%2 = {x % 2} (error)\n") + print(f"x={x} -> x%2 = {bool(x % 2)} (error)\n") raise Exception("even error") else: - print(f"x%2 = {x % 2}\n") + print(f"x={x} -> x%2 = {bool(x % 2)}\n") return x - task = pass_odds(name="pass_odds", cache_dir=tmpdir).split("x", x=[1, 2, 3, 4, 5]) + pass_odds = PassOdds().split("x", x=[1, 2, 3, 4, 5]) - with pytest.raises(Exception, match="even error"): - task() - with pytest.raises(Exception, match="even error"): - task() + with pytest.raises(Exception): + pass_odds(cache_dir=tmpdir, worker="cf") + with pytest.raises(Exception): + pass_odds(cache_dir=tmpdir, worker="cf") out, err = capfd.readouterr() stdout_lines = out.splitlines() @@ -1568,7 +1416,7 @@ def pass_odds(x): assert errors_found == 4 -@attr.s(auto_attribs=True) +@attrs.define(auto_attribs=True) class A: x: int @@ -1576,9 +1424,31 @@ class A: def test_object_input(): """Test function tasks with object inputs""" - @mark.task - def testfunc(a: A): + @python.define + def TestFunc(a: A): return a.x - result = testfunc(a=A(x=7))() - assert result.output.out == 7 + outputs = TestFunc(a=A(x=7))() + assert outputs.out == 7 + + +def test_argstr_formatting(): + @shell.define + class Defn(ShellDef["Defn.Outputs"]): + a1_field: str + b2_field: float + c3_field: ty.Dict[str, str] + d4_field: ty.List[str] = shell.arg(sep=" ") + executable = "dummy" + + class Outputs(ShellOutputs): + pass + + values = dict(a1_field="1", b2_field=2.0, c3_field={"c": "3"}, d4_field=["4"]) + assert ( + argstr_formatting( + "{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}", + values, + ) + == "1 2.000000 -test 3 -me 4" + ) diff --git a/pydra/engine/tests/test_tasks_files.py b/pydra/engine/tests/test_tasks_files.py index a1849e221b..96a4f940a9 100644 --- a/pydra/engine/tests/test_tasks_files.py +++ b/pydra/engine/tests/test_tasks_files.py @@ -5,23 +5,22 @@ import typing as ty from ..submitter import Submitter -from ..core import Workflow -from ... import mark -from ..specs import File, Directory +from pydra.design import python, workflow +from fileformats.generic import File, Directory -@mark.task -def dir_count_file(dirpath): +@python.define +def DirCountFile(dirpath: Directory) -> int: return len(os.listdir(dirpath)) -@mark.task -def dir_count_file_annot(dirpath: Directory): +@python.define +def DirCountFileAnnot(dirpath: Directory) -> int: return len(os.listdir(dirpath)) -@mark.task -def file_add2(file): +@python.define +def FileAdd2(file: File) -> File: array_inp = np.load(file) array_out = array_inp + 2 cwd = os.getcwd() @@ -31,8 +30,8 @@ def file_add2(file): return file_out -@mark.task -def file_mult(file): +@python.define +def FileMult(file: File) -> File: array_inp = np.load(file) array_out = 10 * array_inp cwd = os.getcwd() @@ -41,8 +40,8 @@ def file_mult(file): return file_out -@mark.task -def file_add2_annot(file: File) -> ty.NamedTuple("Output", [("out", File)]): +@python.define +def FileAdd2Annot(file: File) -> File: array_inp = np.load(file) array_out = array_inp + 2 cwd = os.getcwd() @@ -52,8 +51,8 @@ def file_add2_annot(file: File) -> ty.NamedTuple("Output", [("out", File)]): return file_out -@mark.task -def file_mult_annot(file: File) -> ty.NamedTuple("Output", [("out", File)]): +@python.define +def FileMultAnnot(file: File) -> File: array_inp = np.load(file) array_out = 10 * array_inp cwd = os.getcwd() @@ -69,36 +68,38 @@ def test_task_1(tmpdir): # creating abs path file = os.path.join(os.getcwd(), "arr1.npy") np.save(file, arr) - nn = file_add2(name="add2", file=file) + nn = FileAdd2(file=file) - with Submitter(plugin="cf") as sub: - sub(nn) + with Submitter(worker="cf") as sub: + res = sub(nn) # checking the results - results = nn.result() - res = np.load(results.output.out) - assert res == np.array([4]) + + result = np.load(res.outputs.out) + assert result == np.array([4]) def test_wf_1(tmpdir): """workflow with 2 tasks that take file as an input and give file as an aoutput""" - wf = Workflow(name="wf_1", input_spec=["file_orig"]) - wf.add(file_add2(name="add2", file=wf.lzin.file_orig)) - wf.add(file_mult(name="mult", file=wf.add2.lzout.out)) - wf.set_output([("out", wf.mult.lzout.out)]) + + @workflow.define + def Workflow(file_orig: File): + add2 = workflow.add(FileAdd2(file=file_orig)) + mult = workflow.add(FileMult(file=add2.out)) + return mult.out os.chdir(tmpdir) arr = np.array([2, 3]) # creating abs path file_orig = os.path.join(os.getcwd(), "arr_orig.npy") np.save(file_orig, arr) - wf.inputs.file_orig = file_orig + wf = Workflow(file_orig=file_orig) - with Submitter(plugin="cf") as sub: - sub(wf) + with Submitter(worker="cf") as sub: + res = sub(wf) - assert wf.output_dir.exists() - file_output = wf.result().output.out + assert res.output_dir.exists() + file_output = res.outputs.out assert Path(file_output).exists() # loading results array_out = np.load(file_output) @@ -112,15 +113,15 @@ def test_file_annotation_1(tmpdir): # creating abs path file = os.path.join(os.getcwd(), "arr1.npy") np.save(file, arr) - nn = file_add2_annot(name="add2", file=file) + nn = FileAdd2Annot(file=file) - with Submitter(plugin="cf") as sub: - sub(nn) + with Submitter(worker="cf") as sub: + res = sub(nn) # checking the results - results = nn.result() - res = np.load(results.output.out) - assert res == np.array([4]) + assert res.errored is False, " ".join(res.errors["error message"]) + arr = np.load(res.outputs.out) + assert arr == np.array([4]) def test_broken_file(tmpdir): @@ -128,13 +129,12 @@ def test_broken_file(tmpdir): os.chdir(tmpdir) file = os.path.join(os.getcwd(), "non_existent.npy") - nn = file_add2(name="add2", file=file) with pytest.raises(FileNotFoundError): - with Submitter(plugin="cf") as sub: - sub(nn) + with Submitter(worker="cf") as sub: + sub(FileAdd2(file=file)) with pytest.raises(FileNotFoundError, match="do not exist"): - file_add2_annot(name="add2_annot", file=file) + FileAdd2Annot(file=file) def test_broken_file_link(tmpdir): @@ -150,31 +150,27 @@ def test_broken_file_link(tmpdir): os.symlink(file, file_link) os.remove(file) - nn = file_add2(name="add2", file=file_link) # raises error inside task # unless variable is defined as a File pydra will treat it as a string with pytest.raises(FileNotFoundError): - with Submitter(plugin="cf") as sub: - sub(nn) + with Submitter(worker="cf") as sub: + sub(FileAdd2(file=file_link)) with pytest.raises(FileNotFoundError, match="do not exist"): - file_add2_annot(name="add2_annot", file=file_link) + FileAdd2Annot(file=file_link) def test_broken_dir(): """Test how broken directories are handled during hashing""" - # dirpath doesn't exist - nn = dir_count_file(name="listdir", dirpath="/broken_dir_path/") - # raises error inside task # unless variable is defined as a File pydra will treat it as a string with pytest.raises(FileNotFoundError): - with Submitter(plugin="cf") as sub: - sub(nn) + with Submitter(worker="cf") as sub: + sub(DirCountFile(dirpath="/broken_dir_path/")) # raises error before task is run with pytest.raises(FileNotFoundError): - dir_count_file_annot(name="listdir", dirpath="/broken_dir_path/") + DirCountFileAnnot(dirpath="/broken_dir_path/") def test_broken_dir_link1(tmpdir): @@ -188,34 +184,10 @@ def test_broken_dir_link1(tmpdir): os.symlink(dir1, dir1_link) os.rmdir(dir1) - nn = dir_count_file(name="listdir", dirpath=Path(dir1)) # raises error while running task with pytest.raises(FileNotFoundError): - with Submitter(plugin="cf") as sub: - sub(nn) + with Submitter(worker="cf") as sub: + sub(DirCountFile(dirpath=Path(dir1))) with pytest.raises(FileNotFoundError): - dir_count_file_annot(name="listdir", dirpath=Path(dir1)) - - -def test_broken_dir_link2(tmpdir): - # valid dirs with broken symlink(s) are hashed - dir2 = tmpdir.join("dir2") - os.mkdir(dir2) - file1 = dir2.join("file1") - file2 = dir2.join("file2") - file1.open("w+").close() - file2.open("w+").close() - - file1_link = dir2.join("file1_link") - os.symlink(file1, file1_link) - os.remove(file1) # file1_link is broken - - nn = dir_count_file(name="listdir", dirpath=dir2) - # does not raises error because pydra treats dirpath as a string - with Submitter(plugin="cf") as sub: - sub(nn) - - nn2 = dir_count_file_annot(name="listdir", dirpath=str(dir2)) - with Submitter(plugin="cf") as sub: - sub(nn2) + DirCountFileAnnot(dirpath=Path(dir1)) diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index c6aab6544f..eb4c8e5025 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -1,865 +1,682 @@ import pytest -import shutil, os, sys +import shutil +import os +import sys import time import typing as ty import attr from pathlib import Path from .utils import ( - add2, - add2_wait, - multiply, - multiply_list, - multiply_mixed, - power, - ten, - identity, - identity_2flds, - list_output, - fun_addsubvar, - fun_addvar3, - fun_addvar, - fun_addtwo, - add2_sub2_res, - add2_sub2_res_list, - fun_addvar_none, - fun_addvar_default, - fun_addvar_default_notype, - fun_addvar_notype, - fun_addtwo_notype, - fun_write_file, - fun_write_file_list, - fun_write_file_list2dict, - list_sum, - list_mult_sum, + Add2, + Add2Wait, + Multiply, + # MultiplyList, + # MultiplyMixed, + Power, + Ten, + Identity, + Identity2Flds, + ListOutput, + FunAddSubVar, + FunAddVar3, + FunAddVar, + FunAddTwo, + Add2Sub2Res, + Add2Sub2ResList, + FunAddVarNone, + FunAddVarDefault, + FunAddVarDefaultNoType, + FunAddVarNoType, + FunAddTwoNoType, + FunWriteFile, + FunWriteFileList, + FunWriteFileList2Dict, + ListSum, + ListMultSum, DOT_FLAG, ) -from ..submitter import Submitter -from ..core import Workflow -from ... import mark -from ..specs import SpecInfo, BaseSpec, ShellSpec +from pydra.engine.submitter import Submitter +from pydra.design import python, workflow +import pydra.engine.core +from pydra.engine.core import Workflow +from pydra.engine.helpers import plot_workflow from pydra.utils import exc_info_matches -def test_wf_no_input_spec(): - with pytest.raises(ValueError, match='Empty "Inputs" spec'): - Workflow(name="workflow") - - -def test_wf_specinfo_input_spec(): - input_spec = SpecInfo( - name="Input", - fields=[ - ("a", str, "", {"mandatory": True}), - ("b", dict, {"foo": 1, "bar": False}, {"mandatory": False}), - ], - bases=(BaseSpec,), - ) - wf = Workflow( - name="workflow", - input_spec=input_spec, - ) - for x in ["a", "b", "_graph_checksums"]: - assert hasattr(wf.inputs, x) - assert wf.inputs.a == "" - assert wf.inputs.b == {"foo": 1, "bar": False} - bad_input_spec = SpecInfo( - name="Input", - fields=[ - ("a", str, {"mandatory": True}), - ], - bases=(ShellSpec,), - ) - with pytest.raises( - ValueError, match="Provided SpecInfo must have BaseSpec as its base." - ): - Workflow(name="workflow", input_spec=bad_input_spec) - - -def test_wf_dict_input_and_output_spec(): - spec = { - "a": str, - "b": ty.Dict[str, ty.Union[int, bool]], - } - wf = Workflow( - name="workflow", - input_spec=spec, - output_spec=spec, - ) - wf.add( - identity_2flds( - name="identity", - x1=wf.lzin.a, - x2=wf.lzin.b, - ) - ) - wf.set_output( - [ - ("a", wf.identity.lzout.out1), - ("b", wf.identity.lzout.out2), - ] - ) - for x in ["a", "b", "_graph_checksums"]: - assert hasattr(wf.inputs, x) - wf.inputs.a = "any-string" - wf.inputs.b = {"foo": 1, "bar": False} - - with pytest.raises(TypeError) as exc_info: - wf.inputs.a = 1.0 - assert exc_info_matches(exc_info, "Cannot coerce 1.0 into ") - - with pytest.raises(TypeError) as exc_info: - wf.inputs.b = {"foo": 1, "bar": "bad-value"} - assert exc_info_matches( - exc_info, "Could not coerce object, 'bad-value', to any of the union types" - ) - - result = wf() - assert result.output.a == "any-string" - assert result.output.b == {"foo": 1, "bar": False} - +def test_wf_no_output(plugin, tmp_path): + """Raise error when output isn't set with set_output""" -def test_wf_name_conflict1(): - """raise error when workflow name conflicts with a class attribute or method""" - with pytest.raises(ValueError) as excinfo1: - Workflow(name="result", input_spec=["x"]) - assert "Cannot use names of attributes or methods" in str(excinfo1.value) - with pytest.raises(ValueError) as excinfo2: - Workflow(name="done", input_spec=["x"]) - assert "Cannot use names of attributes or methods" in str(excinfo2.value) + @workflow.define + def Worky(x): + workflow.add(Add2(x=x)) + worky = Worky(x=2) -def test_wf_name_conflict2(): - """raise error when a task with the same name is already added to workflow""" - wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(add2(name="task_name", x=wf.lzin.x)) with pytest.raises(ValueError) as excinfo: - wf.add(identity(name="task_name", x=3)) - assert "Another task named task_name is already added" in str(excinfo.value) + worky(worker=plugin, cache_dir=tmp_path) + assert "Worky output cannot be None" in str(excinfo.value) -def test_wf_no_output(plugin, tmpdir): - """Raise error when output isn't set with set_output""" - wf = Workflow(name="wf_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.inputs.x = 2 +def test_wf_1(plugin, tmp_path): + """workflow with one task and no splitter""" - with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: - sub(wf) - assert "Workflow output cannot be None" in str(excinfo.value) + @workflow.define + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + worky = Worky(x=2) -def test_wf_1(plugin, tmpdir): - """workflow with one task and no splitter""" - wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.cache_dir = tmpdir + checksum_before = worky._hash + outputs = worky(worker=plugin, cache_dir=tmp_path) - checksum_before = wf.checksum - with Submitter(plugin=plugin) as sub: - sub(wf) + Workflow.construct(worky) + assert worky._hash == checksum_before - assert wf.checksum == checksum_before - results = wf.result() - assert 4 == results.output.out - assert wf.output_dir.exists() + assert 4 == outputs.out -def test_wf_1a_outpastuple(plugin, tmpdir): +def test_wf_1a_outpastuple(plugin, tmp_path): """workflow with one task and no splitter set_output takes a tuple """ - wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.set_output(("out", wf.add2.lzout.out)) - wf.inputs.x = 2 - wf.plugin = plugin - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out - results = wf.result() - assert 4 == results.output.out - assert wf.output_dir.exists() + worky = Worky(x=2) + outputs = worky(worker=plugin, cache_dir=tmp_path) -def test_wf_1_call_subm(plugin, tmpdir): - """using wf.__call_ with submitter""" - wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.cache_dir = tmpdir + assert 4 == outputs.out - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) - results = wf.result() - assert 4 == results.output.out - assert wf.output_dir.exists() +def test_wf_1_call_subm(plugin, tmp_path): + """using wf["__call_"] with submitter""" + @workflow.define + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out -def test_wf_1_call_plug(plugin, tmpdir): - """using wf.__call_ with plugin""" - wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.plugin = plugin - wf.cache_dir = tmpdir + worky = Worky(x=2) - wf(plugin=plugin) + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - assert 4 == results.output.out - assert wf.output_dir.exists() + assert 4 == outputs.out -def test_wf_1_call_noplug_nosubm(plugin, tmpdir): - """using wf.__call_ without plugin or submitter""" - wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.cache_dir = tmpdir +def test_wf_1_call_plug(plugin, tmp_path): + """using wf["__call_"] with plugin""" - wf() - results = wf.result() - assert 4 == results.output.out - assert wf.output_dir.exists() + @workflow.define + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + worky = Worky(x=2) -def test_wf_1_call_exception(plugin, tmpdir): - """using wf.__call_ with plugin and submitter - should raise an exception""" - wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.plugin = plugin - wf.cache_dir = tmpdir + outputs = worky(plugin=plugin) - with Submitter(plugin=plugin) as sub: - with pytest.raises(Exception) as e: - wf(submitter=sub, plugin=plugin) - assert "Specify submitter OR plugin" in str(e.value) + assert 4 == outputs.out -def test_wf_1_inp_in_call(tmpdir): - """Defining input in __call__""" - wf = Workflow(name="wf_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 1 - results = wf(x=2) - assert 4 == results.output.out +def test_wf_1_call_noplug_nosubm(plugin, tmp_path): + """using wf["__call_"] without plugin or submitter""" + + @workflow.define + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + + worky = Worky(x=2) + + outputs = worky() + + assert 4 == outputs.out -def test_wf_1_upd_in_run(tmpdir): +def test_wf_1_upd_in_run(tmp_path, plugin): """Updating input in __call__""" - wf = Workflow(name="wf_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 1 - results = wf(x=2) - assert 4 == results.output.out + @workflow.define + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out -def test_wf_2(plugin, tmpdir): + worky = Worky(x=1) + worky.x = 2 + outputs = worky(cache_dir=tmp_path, plugin=plugin) + assert 4 == outputs.out + + +def test_wf_2(plugin, tmp_path): """workflow with 2 tasks, no splitter""" - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = 3 - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + return add2.out + + worky = Worky(x=2, y=3) - assert wf.output_dir.exists() - results = wf.result() - assert 8 == results.output.out + outputs = worky(worker=plugin, cache_dir=tmp_path) + assert 8 == outputs.out -def test_wf_2a(plugin, tmpdir): + +def test_wf_2a(plugin, tmp_path): """workflow with 2 tasks, no splitter creating add2_task first (before calling add method), """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - add2_task = add2(name="add2") - add2_task.inputs.x = wf.mult.lzout.out - wf.add(add2_task) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = 3 - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + return add2.out + + worky = Worky(x=2, y=3) + + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - assert 8 == results.output.out - assert wf.output_dir.exists() + assert 8 == outputs.out -def test_wf_2b(plugin, tmpdir): +def test_wf_2b(plugin, tmp_path): """workflow with 2 tasks, no splitter creating add2_task first (before calling add method), adding inputs.x after add method """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - add2_task = add2(name="add2") - wf.add(add2_task) - add2_task.inputs.x = wf.mult.lzout.out - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = 3 - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + return add2.out - results = wf.result() - assert 8 == results.output.out + worky = Worky(x=2, y=3) - assert wf.output_dir.exists() + outputs = worky(worker=plugin, cache_dir=tmp_path) + assert 8 == outputs.out -def test_wf_2c_multoutp(plugin, tmpdir): + +def test_wf_2c_multoutp(plugin, tmp_path): """workflow with 2 tasks, no splitter setting multiple outputs for the workflow """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - add2_task = add2(name="add2") - add2_task.inputs.x = wf.mult.lzout.out - wf.add(add2_task) - # setting multiple output (from both nodes) - wf.set_output([("out_add2", wf.add2.lzout.out), ("out_mult", wf.mult.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = 3 - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) - - results = wf.result() + + @workflow.define(outputs=["out_add2", "out_mult"]) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + return add2.out, mult.out + + worky = Worky(x=2, y=3) + + outputs = worky(worker=plugin, cache_dir=tmp_path) + # checking outputs from both nodes - assert 6 == results.output.out_mult - assert 8 == results.output.out_add2 - assert wf.output_dir.exists() + assert 6 == outputs.out_mult + assert 8 == outputs.out_add2 -def test_wf_2d_outpasdict(plugin, tmpdir): +def test_wf_2d_outpasdict(plugin, tmp_path): """workflow with 2 tasks, no splitter setting multiple outputs using a dictionary """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - add2_task = add2(name="add2") - add2_task.inputs.x = wf.mult.lzout.out - wf.add(add2_task) - # setting multiple output (from both nodes) - wf.set_output({"out_add2": wf.add2.lzout.out, "out_mult": wf.mult.lzout.out}) - wf.inputs.x = 2 - wf.inputs.y = 3 - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) - - results = wf.result() + + @workflow.define(outputs=["out_add2", "out_mult"]) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + return add2.out, mult.out + + worky = Worky(x=2, y=3) + + outputs = worky(worker=plugin, cache_dir=tmp_path) + # checking outputs from both nodes - assert 6 == results.output.out_mult - assert 8 == results.output.out_add2 - assert wf.output_dir.exists() + assert 6 == outputs.out_mult + assert 8 == outputs.out_add2 @pytest.mark.flaky(reruns=3) # when dask -def test_wf_3(plugin_dask_opt, tmpdir): +def test_wf_3(plugin_dask_opt, tmp_path): """testing None value for an input""" - wf = Workflow(name="wf_3", input_spec=["x", "y"]) - wf.add(fun_addvar_none(name="addvar", a=wf.lzin.x, b=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.addvar.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = None - wf.cache_dir = tmpdir - with Submitter(plugin=plugin_dask_opt) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + addvar = workflow.add(FunAddVarNone(a=x, b=y)) + add2 = workflow.add(Add2(x=addvar.out), name="add2") + return add2.out + + worky = Worky(x=2, y=None) - assert wf.output_dir.exists() - results = wf.result() - assert 4 == results.output.out + outputs = worky(worker=plugin_dask_opt, cache_dir=tmp_path) + + assert 4 == outputs.out @pytest.mark.xfail(reason="the task error doesn't propagate") -def test_wf_3a_exception(plugin, tmpdir): - """testinh wf without set input, attr.NOTHING should be set +def test_wf_3a_exception(plugin, tmp_path): + """testinh worky without set input, attr.NOTHING should be set and the function should raise an exception """ - wf = Workflow(name="wf_3", input_spec=["x", "y"]) - wf.add(fun_addvar_none(name="addvar", a=wf.lzin.x, b=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.addvar.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = attr.NOTHING - wf.plugin = plugin - wf.cache_dir = tmpdir - with pytest.raises(TypeError) as excinfo: - with Submitter(plugin=plugin) as sub: - sub(wf) - assert "unsupported" in str(excinfo.value) + @workflow.define + def Worky(x, y): + addvar = workflow.add(FunAddVarNone(a=x, b=y)) + add2 = workflow.add(Add2(x=addvar.out), name="add2") + return add2.out + + worky = Worky(x=2, y=attr.NOTHING) + + with pytest.raises(TypeError, match="unsupported"): + worky(worker=plugin, cache_dir=tmp_path) + + +def test_wf_4(plugin, tmp_path): + """worky with a task that doesn't set one input and use the function default value""" + @workflow.define + def Worky(x, y=None): + addvar = workflow.add(FunAddVarDefault(a=x)) + add2 = workflow.add(Add2(x=addvar.out), name="add2") + return add2.out -def test_wf_4(plugin, tmpdir): - """wf with a task that doesn't set one input and use the function default value""" - wf = Workflow(name="wf_4", input_spec=["x", "y"]) - wf.add(fun_addvar_default(name="addvar", a=wf.lzin.x)) - wf.add(add2(name="add2", x=wf.addvar.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.cache_dir = tmpdir + worky = Worky(x=2) - with Submitter(plugin=plugin) as sub: - sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf.output_dir.exists() - results = wf.result() - assert 5 == results.output.out + assert 5 == outputs.out -def test_wf_4a(plugin, tmpdir): - """wf with a task that doesn't set one input, +def test_wf_4a(plugin, tmp_path): + """worky with a task that doesn't set one input, the unset input is send to the task input, so the task should use the function default value """ - wf = Workflow(name="wf_4a", input_spec=["x", "y"]) - wf.add(fun_addvar_default(name="addvar", a=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.addvar.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x): + addvar = workflow.add(FunAddVarDefault(a=x)) + add2 = workflow.add(Add2(x=addvar.out), name="add2") + return add2.out - assert wf.output_dir.exists() - results = wf.result() - assert 5 == results.output.out + worky = Worky(x=2) + outputs = worky(worker=plugin, cache_dir=tmp_path) -def test_wf_5(plugin, tmpdir): - """wf with two outputs connected to the task outputs + assert 5 == outputs.out + + +def test_wf_5(plugin, tmp_path): + """worky with two outputs connected to the task outputs one set_output """ - wf = Workflow(name="wf_5", input_spec=["x", "y"], x=3, y=2) - wf.add(fun_addsubvar(name="addsub", a=wf.lzin.x, b=wf.lzin.y)) - wf.set_output([("out_sum", wf.addsub.lzout.sum), ("out_sub", wf.addsub.lzout.sub)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define(outputs=["out_sum", "out_sub"]) + def Worky(x, y): + addsub = workflow.add(FunAddSubVar(a=x, b=y)) + return addsub.sum, addsub.sub - results = wf.result() - assert 5 == results.output.out_sum - assert 1 == results.output.out_sub + worky = Worky(x=3, y=2) + outputs = worky(worker=plugin, cache_dir=tmp_path) -def test_wf_5a(plugin, tmpdir): - """wf with two outputs connected to the task outputs, - set_output set twice - """ - wf = Workflow(name="wf_5", input_spec=["x", "y"], x=3, y=2) - wf.add(fun_addsubvar(name="addsub", a=wf.lzin.x, b=wf.lzin.y)) - wf.set_output([("out_sum", wf.addsub.lzout.sum)]) - wf.set_output([("out_sub", wf.addsub.lzout.sub)]) - wf.cache_dir = tmpdir + assert 5 == outputs.out_sum + assert 1 == outputs.out_sub - with Submitter(plugin=plugin) as sub: - sub(wf) - results = wf.result() - assert 5 == results.output.out_sum - assert 1 == results.output.out_sub +def test_wf_5a(plugin, tmp_path): + """worky with two outputs connected to the task outputs, + set_output set twice + """ + @workflow.define(outputs=["out_sum", "out_sub"]) + def Worky(x, y): + addsub = workflow.add(FunAddSubVar(a=x, b=y)) + return addsub.sum, addsub.sub -def test_wf_5b_exception(tmpdir): - """set_output used twice with the same name - exception should be raised""" - wf = Workflow(name="wf_5", input_spec=["x", "y"], x=3, y=2) - wf.add(fun_addsubvar(name="addsub", a=wf.lzin.x, b=wf.lzin.y)) - wf.set_output([("out", wf.addsub.lzout.sum)]) - wf.cache_dir = tmpdir + worky = Worky(x=3, y=2) + outputs = worky(worker=plugin, cache_dir=tmp_path) - with pytest.raises(Exception, match="are already set"): - wf.set_output([("out", wf.addsub.lzout.sub)]) + assert 5 == outputs.out_sum + assert 1 == outputs.out_sub -def test_wf_6(plugin, tmpdir): - """wf with two tasks and two outputs connected to both tasks, +def test_wf_6(plugin, tmp_path): + """worky with two tasks and two outputs connected to both tasks, one set_output """ - wf = Workflow(name="wf_6", input_spec=["x", "y"], x=2, y=3) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) - wf.set_output([("out1", wf.mult.lzout.out), ("out2", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define(outputs=["out1", "out2"]) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + return mult.out, add2.out # - assert wf.output_dir.exists() - results = wf.result() - assert 6 == results.output.out1 - assert 8 == results.output.out2 + worky = Worky(x=2, y=3) + outputs = worky(worker=plugin, cache_dir=tmp_path) -def test_wf_6a(plugin, tmpdir): - """wf with two tasks and two outputs connected to both tasks, + assert 6 == outputs.out1 + assert 8 == outputs.out2 + + +def test_wf_6a(plugin, tmp_path): + """worky with two tasks and two outputs connected to both tasks, set_output used twice """ - wf = Workflow(name="wf_6", input_spec=["x", "y"], x=2, y=3) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) - wf.set_output([("out1", wf.mult.lzout.out)]) - wf.set_output([("out2", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define(outputs=["out1", "out2"]) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + return mult.out, add2.out + + worky = Worky(x=2, y=3) + + outputs = worky(worker=plugin, cache_dir=tmp_path) + + assert 6 == outputs.out1 + assert 8 == outputs.out2 - assert wf.output_dir.exists() - results = wf.result() - assert 6 == results.output.out1 - assert 8 == results.output.out2 +def test_wf_st_1(plugin, tmp_path): + """Worky with one task, a splitter for the workflow""" -def test_wf_st_1(plugin, tmpdir): - """Workflow with one task, a splitter for the workflow""" - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + @workflow.define + def Worky(x): + add2 = workflow.add(Add2(x=x).split("x", x=x), name="add2") - wf.split("x", x=[1, 2]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + return add2.out - checksum_before = wf.checksum - with Submitter(plugin="serial") as sub: - sub(wf) + worky = Worky(x=[1, 2]) + + checksum_before = worky._hash + outputs = worky(cache_dir=tmp_path, plugin=plugin) + + Workflow.construct(worky) + assert worky._hash == checksum_before - assert wf.checksum == checksum_before - results = wf.result() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results[0].output.out == 3 - assert results[1].output.out == 4 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 -def test_wf_st_1_call_subm(plugin, tmpdir): - """Workflow with one task, a splitter for the workflow""" - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) +def test_wf_st_1_call_subm(plugin, tmp_path): + """Worky with one task, a splitter for the workflow""" - wf.split("x", x=[1, 2]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Worky(x): + add2 = workflow.add(Add2(x=x).split("x", x=x), name="add2") - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + return add2.out + + worky = Worky(x=[1, 2]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results[0].output.out == 3 - assert results[1].output.out == 4 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 -def test_wf_st_1_call_plug(plugin, tmpdir): - """Workflow with one task, a splitter for the workflow - using Workflow.__call__(plugin) +def test_wf_st_1_call_plug(plugin, tmp_path): + """Worky with one task, a splitter for the workflow + using Worky.__call__(plugin) """ - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.split("x", x=[1, 2]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Worky(x): + add2 = workflow.add(Add2(x=x).split("x", x=x), name="add2") - wf(plugin=plugin) + return add2.out + + worky = Worky(x=[1, 2]) + + outputs = worky(plugin=plugin) - results = wf.result() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results[0].output.out == 3 - assert results[1].output.out == 4 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 -def test_wf_st_1_call_selfplug(plugin, tmpdir): - """Workflow with one task, a splitter for the workflow - using Workflow.__call__() and using self.plugin +def test_wf_st_1_call_selfplug(plugin, tmp_path): + """Worky with one task, a splitter for the workflow + using Worky.__call__() and using self.plugin """ - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.split("x", x=[1, 2]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.plugin = plugin - wf.cache_dir = tmpdir + @workflow.define + def Worky(x): + add2 = workflow.add(Add2(x=x).split("x", x=x), name="add2") + return add2.out + + worky = Worky(x=[1, 2]) + + outputs = worky() - wf() - results = wf.result() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results[0].output.out == 3 - assert results[1].output.out == 4 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 -def test_wf_st_1_call_noplug_nosubm(plugin, tmpdir): - """Workflow with one task, a splitter for the workflow - using Workflow.__call__() without plugin and submitter +def test_wf_st_1_call_noplug_nosubm(plugin, tmp_path): + """Worky with one task, a splitter for the workflow + using Worky.__call__() without plugin and submitter (a submitter should be created within the __call__ function) """ - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.split("x", x=[1, 2]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Worky(x): + add2 = workflow.add(Add2(x=x).split("x", x=x), name="add2") + return add2.out + + worky = Worky(x=[1, 2]) + + outputs = worky() - wf() - results = wf.result() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results[0].output.out == 3 - assert results[1].output.out == 4 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 -def test_wf_st_1_inp_in_call(tmpdir): +def test_wf_st_1_inp_in_call(tmp_path, plugin): """Defining input in __call__""" - wf = Workflow(name="wf_spl_1", input_spec=["x"], cache_dir=tmpdir).split( - "x", x=[1, 2] - ) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - results = wf() - assert results[0].output.out == 3 - assert results[1].output.out == 4 + @workflow.define + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + + worky = Worky().split("x", x=[1, 2]) + outputs = worky(cache_dir=tmp_path, plugin=plugin) # + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 -def test_wf_st_1_upd_inp_call(tmpdir): + +def test_wf_st_1_upd_inp_call(tmp_path, plugin): """Updating input in __call___""" - wf = Workflow(name="wf_spl_1", input_spec=["x"], cache_dir=tmpdir).split( - "x", x=[11, 22] - ) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - results = wf(x=[1, 2]) - assert results[0].output.out == 3 - assert results[1].output.out == 4 - - -def test_wf_st_noinput_1(plugin, tmpdir): - """Workflow with one task, a splitter for the workflow""" - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) - - wf.split("x", x=[]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.plugin = plugin - wf.cache_dir = tmpdir - - checksum_before = wf.checksum - with Submitter(plugin=plugin) as sub: - sub(wf) - - assert wf.checksum == checksum_before - results = wf.result() - assert results == [] - # checking all directories - assert wf.output_dir == [] + + @workflow.define + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + + worky = Worky().split("x", x=[11, 22]) + outputs = worky(cache_dir=tmp_path, plugin=plugin) # x=[1, 2] + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 -def test_wf_ndst_1(plugin, tmpdir): +def test_wf_st_noinput_1(plugin, tmp_path): + """Worky with one task, a splitter for the workflow""" + + @workflow.define + def Worky(x): + add2 = workflow.add(Add2(x=x).split("x", x=x), name="add2") + return add2.out + + worky = Worky(x=[]) + + checksum_before = worky._hash + outputs = worky(worker=plugin, cache_dir=tmp_path) + + wf = Workflow.construct(worky) + assert worky._hash == checksum_before + + assert outputs.out == [] + + +def test_wf_ndst_1(plugin, tmp_path): """workflow with one task, a splitter on the task level""" - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2").split("x", x=wf.lzin.x)) - wf.inputs.x = [1, 2] - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - - checksum_before = wf.checksum - with Submitter(plugin=plugin) as sub: - sub(wf) - - assert wf.checksum == checksum_before - results = wf.result() + + @workflow.define + def Worky(x): + add2 = workflow.add(Add2().split("x", x=x), name="add2") + return add2.out + + worky = Worky(x=[1, 2]) + + checksum_before = worky._hash + outputs = worky(worker=plugin, cache_dir=tmp_path) + + wf = Workflow.construct(worky) + assert worky._hash == checksum_before + # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results.output.out == [3, 4] - assert wf.output_dir.exists() + assert outputs.out == [3, 4] -def test_wf_ndst_updatespl_1(plugin, tmpdir): +def test_wf_ndst_updatespl_1(plugin, tmp_path): """workflow with one task, a splitter on the task level is added *after* calling add """ - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2")) - wf.inputs.x = [1, 2] - wf.add2.split("x", x=wf.lzin.x) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x): + add2 = workflow.add(Add2().split("x", x=x), name="add2") + return add2.out - results = wf.result() - # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results.output.out == [3, 4] - assert wf.output_dir.exists() + worky = Worky(x=[1, 2]) - assert wf.output_dir.exists() + outputs = worky(worker=plugin, cache_dir=tmp_path) + + # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] + assert outputs.out == [3, 4] -def test_wf_ndst_updatespl_1a(plugin, tmpdir): +def test_wf_ndst_updatespl_1a(plugin, tmp_path): """workflow with one task (initialize before calling add), a splitter on the task level is added *after* calling add """ - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - task_add2 = add2(name="add2", x=wf.lzin.x) - wf.add(task_add2) - task_add2.split("x", x=[1, 2]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x): + add2 = workflow.add(Add2().split("x", x=x), name="add2") + return add2.out - results = wf.result() - # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results.output.out == [3, 4] - assert wf.output_dir.exists() + worky = Worky(x=[1, 2]) - assert wf.output_dir.exists() + outputs = worky(worker=plugin, cache_dir=tmp_path) + + # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] + assert outputs.out == [3, 4] -def test_wf_ndst_updateinp_1(plugin, tmpdir): +def test_wf_ndst_updateinp_1(plugin, tmp_path): """workflow with one task, a splitter on the task level, updating input of the task after calling add """ - wf = Workflow(name="wf_spl_1", input_spec=["x", "y"]) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.add2.split("x", x=wf.lzin.y) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + add2 = workflow.add(Add2().split("x", x=y), name="add2") + return add2.out + + worky = Worky(x=[1, 2], y=[11, 12]) - results = wf.result() - assert results.output.out == [13, 14] - assert wf.output_dir.exists() + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf.output_dir.exists() + assert outputs.out == [13, 14] -def test_wf_ndst_noinput_1(plugin, tmpdir): +def test_wf_ndst_noinput_1(plugin, tmp_path): """workflow with one task, a splitter on the task level""" - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2").split("x", x=wf.lzin.x)) - wf.inputs.x = [] - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - checksum_before = wf.checksum - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x): + add2 = workflow.add(Add2().split("x", x=x), name="add2") + return add2.out + + worky = Worky(x=[]) + + checksum_before = worky._hash + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf.checksum == checksum_before - results = wf.result() + wf = Workflow.construct(worky) + assert worky._hash == checksum_before - assert results.output.out == [] - assert wf.output_dir.exists() + assert outputs.out == [] -def test_wf_st_2(plugin, tmpdir): +def test_wf_st_2(plugin, tmp_path): """workflow with one task, splitters and combiner for workflow""" - wf = Workflow(name="wf_st_2", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) - wf.split("x", x=[1, 2]).combine(combiner="x") - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") - with Submitter(plugin=plugin) as sub: - sub(wf) + return add2.out + + worky = Worky().split("x", x=[1, 2]).combine(combiner="x") + + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results[0].output.out == 3 - assert results[1].output.out == 4 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 -def test_wf_ndst_2(plugin, tmpdir): +def test_wf_ndst_2(plugin, tmp_path): """workflow with one task, splitters and combiner on the task level""" - wf = Workflow(name="wf_ndst_2", input_spec=["x"]) - wf.add(add2(name="add2").split("x", x=wf.lzin.x).combine(combiner="x")) - wf.inputs.x = [1, 2] - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x): + add2 = workflow.add(Add2().split("x", x=x).combine(combiner="x"), name="add2") + return add2.out + + worky = Worky(x=[1, 2]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results.output.out == [3, 4] - assert wf.output_dir.exists() + assert outputs.out == [3, 4] # workflows with structures A -> B -def test_wf_st_3(plugin, tmpdir): - """workflow with 2 tasks, splitter on wf level""" - wf = Workflow(name="wfst_3", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) - wf.split(("x", "y"), x=[1, 2], y=[11, 12]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir +def test_wf_st_3(plugin, tmp_path): + """workflow with 2 tasks, splitter on worky level""" + + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") - with Submitter(plugin=plugin) as sub: - sub(wf) + return add2.out + + worky = Worky().split(("x", "y"), x=[1, 2], y=[11, 12]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) expected = [ ({"wfst_3.x": 1, "wfst_3.y": 11}, 13), @@ -870,829 +687,683 @@ def test_wf_st_3(plugin, tmpdir): ({"wfst_3.x": 1, "wfst_3.y": 1}, 26), ] - results = wf.result() for i, res in enumerate(expected): - assert results[i].output.out == res[1] + assert outputs.out[i] == res[1] - # checking the return_inputs option, either return_inputs is True or "val", - # it should give values of inputs that corresponds to the specific element - results_verb = wf.result(return_inputs=True) - results_verb_val = wf.result(return_inputs="val") - for i, res in enumerate(expected): - assert (results_verb[i][0], results_verb[i][1].output.out) == res - assert (results_verb_val[i][0], results_verb_val[i][1].output.out) == res - # checking the return_inputs option return_inputs="ind" - # it should give indices of inputs (instead of values) for each element - results_verb_ind = wf.result(return_inputs="ind") - for i, res in enumerate(expected_ind): - assert (results_verb_ind[i][0], results_verb_ind[i][1].output.out) == res +def test_wf_ndst_3(plugin, tmp_path): + """Test workflow with 2 tasks, splitter on a task level""" - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply().split(("x", "y"), x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + return add2.out + worky = Worky(x=[1, 2], y=[11, 12]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) -def test_wf_ndst_3(plugin, tmpdir): - """Test workflow with 2 tasks, splitter on a task level""" - wf = Workflow(name="wf_ndst_3", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split(("x", "y"), x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) - - results = wf.result() # expected: [({"test7.x": 1, "test7.y": 11}, 13), ({"test7.x": 2, "test.y": 12}, 26)] - assert results.output.out == [13, 26] - # checking the output directory - assert wf.output_dir.exists() + assert outputs.out == [13, 26] -def test_wf_st_4(plugin, tmpdir): +def test_wf_st_4(plugin, tmp_path): """workflow with two tasks, scalar splitter and combiner for the workflow""" - wf = Workflow(name="wf_st_4", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) - wf.split(("x", "y"), x=[1, 2], y=[11, 12]) - wf.combine("x") - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + + return add2.out - with Submitter(plugin=plugin) as sub: - sub(wf) + worky = Worky().split(("x", "y"), x=[1, 2], y=[11, 12]).combine("x") + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() # expected: [ # ({"test7.x": 1, "test7.y": 11}, 13), ({"test7.x": 2, "test.y": 12}, 26) # ] - assert results[0].output.out == 13 - assert results[1].output.out == 26 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert outputs.out[0] == 13 + assert outputs.out[1] == 26 -def test_wf_ndst_4(plugin, tmpdir): +def test_wf_ndst_4(plugin, tmp_path): """workflow with two tasks, scalar splitter and combiner on tasks level""" - wf = Workflow(name="wf_ndst_4", input_spec=["a", "b"]) - wf.add(multiply(name="mult").split(("x", "y"), x=wf.lzin.a, y=wf.lzin.b)) - wf.add(add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - wf.inputs.a = [1, 2] - wf.inputs.b = [11, 12] + @workflow.define + def Worky(a, b): + mult = workflow.add(Multiply().split(("x", "y"), x=a, y=b), name="mult") + add2 = workflow.add(Add2(x=mult.out).combine("mult.x"), name="add2") + + return add2.out + + worky = Worky(a=[1, 2], b=[11, 12]) - with Submitter(plugin=plugin) as sub: - sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() # expected: [ # ({"test7.x": 1, "test7.y": 11}, 13), ({"test7.x": 2, "test.y": 12}, 26) # ] - assert results.output.out == [13, 26] - # checking the output directory - assert wf.output_dir.exists() + assert outputs.out == [13, 26] -def test_wf_st_5(plugin, tmpdir): +def test_wf_st_5(plugin, tmp_path): """workflow with two tasks, outer splitter and no combiner""" - wf = Workflow(name="wf_st_5", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) - - wf.split(["x", "y"], x=[1, 2], y=[11, 12]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) - - results = wf.result() - assert results[0].output.out == 13 - assert results[1].output.out == 14 - assert results[2].output.out == 24 - assert results[3].output.out == 26 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out).split(["x", "y"], x=x, y=y), name="add2") + + return add2.out + + worky = Worky(x=[1, 2], y=[11, 12]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) + + assert outputs.out[0] == 13 + assert outputs.out[1] == 14 + assert outputs.out[2] == 24 + assert outputs.out[3] == 26 -def test_wf_ndst_5(plugin, tmpdir): + +def test_wf_ndst_5(plugin, tmp_path): """workflow with two tasks, outer splitter on tasks level and no combiner""" - wf = Workflow(name="wf_ndst_5", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) - - results = wf.result() - assert results.output.out[0] == 13 - assert results.output.out[1] == 14 - assert results.output.out[2] == 24 - assert results.output.out[3] == 26 - # checking the output directory - assert wf.output_dir.exists() - - -def test_wf_st_6(plugin, tmpdir): + + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + return add2.out + + worky = Worky(x=[1, 2], y=[11, 12]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) + + assert outputs.out[0] == 13 + assert outputs.out[1] == 14 + assert outputs.out[2] == 24 + assert outputs.out[3] == 26 + + +def test_wf_st_6(plugin, tmp_path): """workflow with two tasks, outer splitter and combiner for the workflow""" - wf = Workflow(name="wf_st_6", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) - - wf.split(["x", "y"], x=[1, 2, 3], y=[11, 12]) - wf.combine("x") - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) - - results = wf.result() - assert results[0][0].output.out == 13 - assert results[0][1].output.out == 24 - assert results[0][2].output.out == 35 - assert results[1][0].output.out == 14 - assert results[1][1].output.out == 26 - assert results[1][2].output.out == 38 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + + return add2.out + + worky = Worky().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("x") -def test_wf_ndst_6(plugin, tmpdir): + outputs = worky(worker=plugin, cache_dir=tmp_path) + + assert outputs.out[0][0] == 13 + assert outputs.out[0][1] == 24 + assert outputs.out[0][2] == 35 + assert outputs.out[1][0] == 14 + assert outputs.out[1][1] == 26 + assert outputs.out[1][2] == 38 + + +def test_wf_ndst_6(plugin, tmp_path): """workflow with two tasks, outer splitter and combiner on tasks level""" - wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out).combine("mult.x"), name="add2") + return add2.out - results = wf.result() - assert results.output.out[0] == [13, 24, 35] - assert results.output.out[1] == [14, 26, 38] + worky = Worky(x=[1, 2, 3], y=[11, 12]) - # checking the output directory - assert wf.output_dir.exists() + outputs = worky(worker=plugin, cache_dir=tmp_path) + assert outputs.out == [[13, 24, 35], [14, 26, 38]] -def test_wf_ndst_7(plugin, tmpdir): + +def test_wf_ndst_7(plugin, tmp_path): """workflow with two tasks, outer splitter and (full) combiner for first node only""" - wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split("x", x=wf.lzin.x, y=wf.lzin.y).combine("x")) - wf.add(identity(name="iden", x=wf.mult.lzout.out)) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = 11 - wf.set_output([("out", wf.iden.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(y=y).split(x=x).combine("x"), name="mult") + iden = workflow.add(Identity(x=mult.out)) + return iden.out + + worky = Worky(x=[1, 2, 3], y=11) - results = wf.result() - assert results.output.out == [11, 22, 33] + outputs = worky(worker=plugin, cache_dir=tmp_path) - # checking the output directory - assert wf.output_dir.exists() + assert outputs.out == [11, 22, 33] -def test_wf_ndst_8(plugin, tmpdir): +def test_wf_ndst_8(plugin, tmp_path): """workflow with two tasks, outer splitter and (partial) combiner for first task only""" - wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add( - multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y).combine("x") - ) - wf.add(identity(name="iden", x=wf.mult.lzout.out)) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.iden.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + mult = workflow.add( + Multiply().split(["x", "y"], x=x, y=y).combine("x"), name="mult" + ) + iden = workflow.add(Identity(x=mult.out)) + return iden.out + + worky = Worky(x=[1, 2, 3], y=[11, 12]) - results = wf.result() - assert results.output.out[0] == [11, 22, 33] - assert results.output.out[1] == [12, 24, 36] + outputs = worky(worker=plugin, cache_dir=tmp_path) - # checking the output directory - assert wf.output_dir.exists() + assert outputs.out == [[11, 22, 33], [12, 24, 36]] -def test_wf_ndst_9(plugin, tmpdir): +def test_wf_ndst_9(plugin, tmp_path): """workflow with two tasks, outer splitter and (full) combiner for first task only""" - wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add( - multiply(name="mult") - .split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y) - .combine(["x", "y"]) - ) - wf.add(identity(name="iden", x=wf.mult.lzout.out)) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.iden.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + mult = workflow.add( + Multiply().split(["x", "y"], x=x, y=y).combine(["x", "y"]), name="mult" + ) + iden = workflow.add(Identity(x=mult.out)) + return iden.out + + worky = Worky(x=[1, 2, 3], y=[11, 12]) - results = wf.result() - assert results.output.out == [11, 12, 22, 24, 33, 36] + outputs = worky(worker=plugin, cache_dir=tmp_path) - # checking the output directory - assert wf.output_dir.exists() + assert outputs.out == [11, 12, 22, 24, 33, 36] # workflows with structures A -> B -> C -def test_wf_3sernd_ndst_1(plugin, tmpdir): +def test_wf_3sernd_ndst_1(plugin, tmp_path): """workflow with three "serial" tasks, checking if the splitter is propagating""" - wf = Workflow(name="wf_3sernd_ndst_1", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2_1st", x=wf.mult.lzout.out)) - wf.add(add2(name="add2_2nd", x=wf.add2_1st.lzout.out)) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.add2_2nd.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) + + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y), name="mult") + add2_1st = workflow.add(Add2(x=mult.out), name="add2_1st") + add2_2nd = workflow.add(Add2(x=add2_1st.out), name="add2_2nd") + return add2_2nd.out + + worky = Worky(x=[1, 2], y=[11, 12]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) # splitter from the first task should propagate to all tasks, # splitter_rpn should be the same in all tasks - assert wf.mult.state.splitter == ["mult.x", "mult.y"] - assert wf.add2_1st.state.splitter == "_mult" - assert wf.add2_2nd.state.splitter == "_add2_1st" + wf = Workflow.construct(worky) + assert wf["mult"].state.splitter == ["mult.x", "mult.y"] + assert wf["add2_1st"].state.splitter == "_mult" + assert wf["add2_2nd"].state.splitter == "_add2_1st" assert ( ["mult.x", "mult.y", "*"] - == wf.mult.state.splitter_rpn - == wf.add2_1st.state.splitter_rpn - == wf.add2_2nd.state.splitter_rpn + == wf["mult"].state.splitter_rpn + == wf["add2_1st"].state.splitter_rpn + == wf["add2_2nd"].state.splitter_rpn ) - results = wf.result() - assert results.output.out[0] == 15 - assert results.output.out[1] == 16 - assert results.output.out[2] == 26 - assert results.output.out[3] == 28 - # checking the output directory - assert wf.output_dir.exists() + assert outputs.out == [15, 16, 26, 28] -def test_wf_3sernd_ndst_1a(plugin, tmpdir): +def test_wf_3sernd_ndst_1a(plugin, tmp_path): """ workflow with three "serial" tasks, checking if the splitter is propagating first task has a splitter that propagates to the 2nd task, and the 2nd task is adding one more input to the splitter """ - wf = Workflow(name="wf_3sernd_ndst_1", input_spec=["x", "y"]) - wf.add(add2(name="add2_1st").split("x", x=wf.lzin.x)) - wf.add(multiply(name="mult", x=wf.add2_1st.lzout.out).split("y", y=wf.lzin.y)) - wf.add(add2(name="add2_2nd", x=wf.mult.lzout.out)) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.add2_2nd.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + add2_1st = workflow.add(Add2().split("x", x=x), name="add2_1st") + mult = workflow.add(Multiply(x=add2_1st.out).split("y", y=y), name="mult") + add2_2nd = workflow.add(Add2(x=mult.out), name="add2_2nd") + return add2_2nd.out + + worky = Worky(x=[1, 2], y=[11, 12]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) # splitter from the 1st task should propagate and the 2nd task should add one more # splitter_rpn for the 2nd and the 3rd task should be the same - assert wf.add2_1st.state.splitter == "add2_1st.x" - assert wf.mult.state.splitter == ["_add2_1st", "mult.y"] - assert wf.add2_2nd.state.splitter == "_mult" + wf = Workflow.construct(worky) + assert wf["add2_1st"].state.splitter == "add2_1st.x" + assert wf["mult"].state.splitter == ["_add2_1st", "mult.y"] + assert wf["add2_2nd"].state.splitter == "_mult" assert ( ["add2_1st.x", "mult.y", "*"] - == wf.mult.state.splitter_rpn - == wf.add2_2nd.state.splitter_rpn + == wf["mult"].state.splitter_rpn + == wf["add2_2nd"].state.splitter_rpn ) - results = wf.result() - assert results.output.out[0] == 35 - assert results.output.out[1] == 38 - assert results.output.out[2] == 46 - assert results.output.out[3] == 50 - # checking the output directory - assert wf.output_dir.exists() + assert outputs.out == [35, 38, 46, 50] # workflows with structures A -> C, B -> C @pytest.mark.flaky(reruns=3) # when dask -def test_wf_3nd_st_1(plugin_dask_opt, tmpdir): +def test_wf_3nd_st_1(plugin_dask_opt, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter on the workflow level """ - wf = Workflow(name="wf_st_7", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x)) - wf.add(add2(name="add2y", x=wf.lzin.y)) - wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) - wf.split(["x", "y"], x=[1, 2, 3], y=[11, 12]) - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Worky(x, y): + add2x = workflow.add(Add2(x=x), name="add2x") + add2y = workflow.add(Add2(x=y), name="add2y") + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out), name="mult") - with Submitter(plugin=plugin_dask_opt) as sub: - sub(wf) + return mult.out - results = wf.result() - assert len(results) == 6 - assert results[0].output.out == 39 - assert results[1].output.out == 42 - assert results[5].output.out == 70 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + worky = Worky().split(["x", "y"], x=[1, 2, 3], y=[11, 12]) + + outputs = worky(worker=plugin_dask_opt, cache_dir=tmp_path) + + assert outputs.out[0] == 39 + assert outputs.out[1] == 42 + assert outputs.out[5] == 70 @pytest.mark.flaky(reruns=3) # when dask -def test_wf_3nd_ndst_1(plugin_dask_opt, tmpdir): +def test_wf_3nd_ndst_1(plugin_dask_opt, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter on the tasks levels """ - wf = Workflow(name="wf_ndst_7", input_spec=["x", "y"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) - wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin_dask_opt) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2().split("x", x=y), name="add2y") + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out), name="mult") + return mult.out + + worky = Worky(x=[1, 2, 3], y=[11, 12]) + + outputs = worky(worker=plugin_dask_opt, cache_dir=tmp_path) - results = wf.result() - assert len(results.output.out) == 6 - assert results.output.out == [39, 42, 52, 56, 65, 70] - # checking the output directory - assert wf.output_dir.exists() + assert len(outputs.out) == 6 + assert outputs.out == [39, 42, 52, 56, 65, 70] -def test_wf_3nd_st_2(plugin, tmpdir): +def test_wf_3nd_st_2(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter and partial combiner on the workflow level """ - wf = Workflow(name="wf_st_8", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x)) - wf.add(add2(name="add2y", x=wf.lzin.y)) - wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) - wf.split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("x") - - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) - - results = wf.result() - assert len(results) == 2 - assert results[0][0].output.out == 39 - assert results[0][1].output.out == 52 - assert results[0][2].output.out == 65 - assert results[1][0].output.out == 42 - assert results[1][1].output.out == 56 - assert results[1][2].output.out == 70 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + @workflow.define + def Worky(x, y): + add2x = workflow.add(Add2(x=x), name="add2x") + add2y = workflow.add(Add2(x=y), name="add2y") + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out), name="mult") + return mult.out + + worky = Worky().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("x") + + outputs = worky(worker=plugin, cache_dir=tmp_path) -def test_wf_3nd_ndst_2(plugin, tmpdir): + assert outputs.out[0][0] == 39 + assert outputs.out[0][1] == 52 + assert outputs.out[0][2] == 65 + assert outputs.out[1][0] == 42 + assert outputs.out[1][1] == 56 + assert outputs.out[1][2] == 70 + + +def test_wf_3nd_ndst_2(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter and partial combiner on the tasks levels """ - wf = Workflow(name="wf_ndst_8", input_spec=["x", "y"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) - wf.add( - multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).combine( - "add2x.x" + + @workflow.define + def Worky(x, y): + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2().split("x", x=y), name="add2y") + mult = workflow.add( + Multiply(x=add2x.out, y=add2y.out).combine("add2x.x"), name="mult" ) - ) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir + return mult.out + + worky = Worky(x=[1, 2, 3], y=[11, 12]) - with Submitter(plugin="serial") as sub: - sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - assert len(results.output.out) == 2 - assert results.output.out[0] == [39, 52, 65] - assert results.output.out[1] == [42, 56, 70] - # checking the output directory - assert wf.output_dir.exists() + assert len(outputs.out) == 2 + assert outputs.out[0] == [39, 52, 65] + assert outputs.out[1] == [42, 56, 70] -def test_wf_3nd_st_3(plugin, tmpdir): +def test_wf_3nd_st_3(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter and partial combiner (from the second task) on the workflow level """ - wf = Workflow(name="wf_st_9", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x)) - wf.add(add2(name="add2y", x=wf.lzin.y)) - wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) - wf.split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("y") - - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) - - results = wf.result() - assert len(results) == 3 - assert results[0][0].output.out == 39 - assert results[0][1].output.out == 42 - assert results[1][0].output.out == 52 - assert results[1][1].output.out == 56 - assert results[2][0].output.out == 65 - assert results[2][1].output.out == 70 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + @workflow.define + def Worky(x, y): + add2x = workflow.add(Add2(x=x), name="add2x") + add2y = workflow.add(Add2(x=y), name="add2y") + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out), name="mult") + return mult.out + + worky = Worky().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("y") + + outputs = worky(worker=plugin, cache_dir=tmp_path) -def test_wf_3nd_ndst_3(plugin, tmpdir): + assert outputs.out[0][0] == 39 + assert outputs.out[0][1] == 42 + assert outputs.out[1][0] == 52 + assert outputs.out[1][1] == 56 + assert outputs.out[2][0] == 65 + assert outputs.out[2][1] == 70 + + +def test_wf_3nd_ndst_3(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter and partial combiner (from the second task) on the tasks levels """ - wf = Workflow(name="wf_ndst_9", input_spec=["x", "y"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) - wf.add( - multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).combine( - "add2y.x" + + @workflow.define + def Worky(x, y): + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2().split("x", x=y), name="add2y") + mult = workflow.add( + Multiply(x=add2x.out, y=add2y.out).combine("add2y.x"), name="mult" ) - ) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir + return mult.out + + worky = Worky(x=[1, 2, 3], y=[11, 12]) - with Submitter(plugin=plugin) as sub: - sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - assert len(results.output.out) == 3 - assert results.output.out[0] == [39, 42] - assert results.output.out[1] == [52, 56] - assert results.output.out[2] == [65, 70] - # checking the output directory - assert wf.output_dir.exists() + assert len(outputs.out) == 3 + assert outputs.out[0] == [39, 42] + assert outputs.out[1] == [52, 56] + assert outputs.out[2] == [65, 70] -def test_wf_3nd_st_4(plugin, tmpdir): +def test_wf_3nd_st_4(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter and full combiner on the workflow level """ - wf = Workflow(name="wf_st_10", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x)) - wf.add(add2(name="add2y", x=wf.lzin.y)) - wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) - wf.split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine(["x", "y"]) - wf.set_output([("out", wf.mult.lzout.out)]) - wf.plugin = plugin - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) - - results = wf.result() - assert len(results) == 6 - assert results[0].output.out == 39 - assert results[1].output.out == 42 - assert results[2].output.out == 52 - assert results[3].output.out == 56 - assert results[4].output.out == 65 - assert results[5].output.out == 70 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + + @workflow.define + def Worky(x, y): + add2x = workflow.add(Add2(x=x), name="add2x") + add2y = workflow.add(Add2(x=y), name="add2y") + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out), name="mult") + return mult.out + + worky = Worky().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine(["x", "y"]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) + + assert outputs.out[0] == 39 + assert outputs.out[1] == 42 + assert outputs.out[2] == 52 + assert outputs.out[3] == 56 + assert outputs.out[4] == 65 + assert outputs.out[5] == 70 -def test_wf_3nd_ndst_4(plugin, tmpdir): +def test_wf_3nd_ndst_4(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter and full combiner on the tasks levels """ - wf = Workflow(name="wf_ndst_10", input_spec=["x", "y"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) - wf.add( - multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).combine( - ["add2x.x", "add2y.x"] + + @workflow.define + def Worky(x, y): + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2().split("x", x=y), name="add2y") + mult = workflow.add( + Multiply(x=add2x.out, y=add2y.out).combine(["add2x.x", "add2y.x"]) ) - ) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir + return mult.out + + worky = Worky(x=[1, 2, 3], y=[11, 12]) - with Submitter(plugin=plugin) as sub: - sub(wf) - # assert wf.output_dir.exists() - results = wf.result() + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert len(results.output.out) == 6 - assert results.output.out == [39, 42, 52, 56, 65, 70] - # checking the output directory - assert wf.output_dir.exists() + # assert wf["output_dir"].exists() + assert len(outputs.out) == 6 + assert outputs.out == [39, 42, 52, 56, 65, 70] -def test_wf_3nd_st_5(plugin, tmpdir): + +def test_wf_3nd_st_5(plugin, tmp_path): """workflow with three tasks (A->C, B->C) and three fields in the splitter, splitter and partial combiner (from the second task) on the workflow level """ - wf = Workflow(name="wf_st_9", input_spec=["x", "y", "z"]) - wf.add(add2(name="add2x", x=wf.lzin.x)) - wf.add(add2(name="add2y", x=wf.lzin.y)) - wf.add( - fun_addvar3( - name="addvar", a=wf.add2x.lzout.out, b=wf.add2y.lzout.out, c=wf.lzin.z - ) + + @workflow.define + def Worky(x, y, z): + add2x = workflow.add(Add2(x=x), name="add2x") + add2y = workflow.add(Add2(x=y), name="add2y") + addvar = workflow.add(FunAddVar3(a=add2x.out, b=add2y.out, c=z)) + return addvar.out + + worky = ( + Worky().split(["x", "y", "z"], x=[2, 3], y=[11, 12], z=[10, 100]).combine("y") ) - wf.split(["x", "y", "z"], x=[2, 3], y=[11, 12], z=[10, 100]).combine("y") - - wf.set_output([("out", wf.addvar.lzout.out)]) - wf.plugin = plugin - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) - - results = wf.result() - assert len(results) == 4 - assert results[0][0].output.out == 27 - assert results[0][1].output.out == 28 - assert results[1][0].output.out == 117 - assert results[1][1].output.out == 118 - assert results[2][0].output.out == 28 - assert results[2][1].output.out == 29 - assert results[3][0].output.out == 118 - assert results[3][1].output.out == 119 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + outputs = worky(worker=plugin, cache_dir=tmp_path) + + assert outputs.out[0][0] == 27 + assert outputs.out[0][1] == 28 + assert outputs.out[1][0] == 117 + assert outputs.out[1][1] == 118 + assert outputs.out[2][0] == 28 + assert outputs.out[2][1] == 29 + assert outputs.out[3][0] == 118 + assert outputs.out[3][1] == 119 -def test_wf_3nd_ndst_5(plugin, tmpdir): +def test_wf_3nd_ndst_5(plugin, tmp_path): """workflow with three tasks (A->C, B->C) and three fields in the splitter, all tasks have splitters and the last one has a partial combiner (from the 2nd) """ - wf = Workflow(name="wf_st_9", input_spec=["x", "y", "z"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) - wf.add( - fun_addvar3(name="addvar", a=wf.add2x.lzout.out, b=wf.add2y.lzout.out) - .split("c", c=wf.lzin.z) - .combine("add2x.x") - ) - wf.inputs.x = [2, 3] - wf.inputs.y = [11, 12] - wf.inputs.z = [10, 100] - wf.set_output([("out", wf.addvar.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Worky(x, y, z): + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2().split("x", x=y), name="add2y") + addvar = workflow.add( + FunAddVar3(a=add2x.out, b=add2y.out).split("c", c=z).combine("add2x.x") + ) + + return addvar.out + + worky = Worky(x=[2, 3], y=[11, 12], z=[10, 100]) - with Submitter(plugin=plugin) as sub: - sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - assert len(results.output.out) == 4 - assert results.output.out[0] == [27, 28] - assert results.output.out[1] == [117, 118] - assert results.output.out[2] == [28, 29] - assert results.output.out[3] == [118, 119] + assert len(outputs.out) == 4 + assert outputs.out[0] == [27, 28] + assert outputs.out[1] == [117, 118] + assert outputs.out[2] == [28, 29] + assert outputs.out[3] == [118, 119] # checking all directories - assert wf.output_dir.exists() -def test_wf_3nd_ndst_6(plugin, tmpdir): +def test_wf_3nd_ndst_6(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, the third one uses scalar splitter from the previous ones and a combiner """ - wf = Workflow(name="wf_ndst_9", input_spec=["x", "y"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) - wf.add( - multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out) - .split(("_add2x", "_add2y")) - .combine("add2y.x") - ) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2().split("x", x=y), name="add2y") + mult = workflow.add( + Multiply(x=add2x.out, y=add2y.out) + .split(("_add2x", "_add2y")) + .combine("add2y.x") + ) + return mult.out + + worky = Worky(x=[1, 2], y=[11, 12]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - assert results.output.out == [39, 56] - # checking the output directory - assert wf.output_dir.exists() + assert outputs.out == [39, 56] -def test_wf_3nd_ndst_7(plugin, tmpdir): +def test_wf_3nd_ndst_7(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, the third one uses scalar splitter from the previous ones """ - wf = Workflow(name="wf_ndst_9", input_spec=["x"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y").split("x", x=wf.lzin.x)) - wf.add( - multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).split( - ("_add2x", "_add2y") + + @workflow.define + def Worky(x): + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2().split("x", x=x), name="add2y") + mult = workflow.add( + Multiply(x=add2x.out, y=add2y.out).split(("_add2x", "_add2y")) ) - ) - wf.inputs.x = [1, 2] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir + return mult.out + + worky = Worky(x=[1, 2]) - with Submitter(plugin=plugin) as sub: - sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - assert results.output.out == [9, 16] - # checking the output directory - assert wf.output_dir.exists() + assert outputs.out == [9, 16] # workflows with structures A -> B -> C with multiple connections -def test_wf_3nd_8(tmpdir): +def test_wf_3nd_8(tmp_path): """workflow with three tasks A->B->C vs two tasks A->C with multiple connections""" - wf = Workflow(name="wf", input_spec=["zip"], cache_dir=tmpdir) - wf.inputs.zip = [["test1", "test3", "test5"], ["test2", "test4", "test6"]] - wf.add(identity_2flds(name="iden2flds_1", x2="Hoi").split("x1", x1=wf.lzin.zip)) + @workflow.define(outputs=["out1", "out2", "out1a", "out2a"]) + def Worky(zip): + + iden2flds_1 = workflow.add( + Identity2Flds(x2="Hoi").split("x1", x1=zip), name="iden2flds_1" + ) - wf.add(identity(name="identity", x=wf.iden2flds_1.lzout.out1)) + identity = workflow.add(Identity(x=iden2flds_1.out1)) - wf.add( - identity_2flds( - name="iden2flds_2", x1=wf.identity.lzout.out, x2=wf.iden2flds_1.lzout.out2 + iden2flds_2 = workflow.add( + Identity2Flds(x1=identity.out, x2=iden2flds_1.out2), name="iden2flds_2" ) - ) - wf.add( - identity_2flds( - name="iden2flds_2a", - x1=wf.iden2flds_1.lzout.out1, - x2=wf.iden2flds_1.lzout.out2, + iden2flds_2a = workflow.add( + Identity2Flds( + x1=iden2flds_1.out1, + x2=iden2flds_1.out2, + ) ) - ) - wf.set_output( - [ - ("out1", wf.iden2flds_2.lzout.out1), - ("out2", wf.iden2flds_2.lzout.out2), - ("out1a", wf.iden2flds_2a.lzout.out1), - ("out2a", wf.iden2flds_2a.lzout.out2), - ] - ) + return iden2flds_2.out1, iden2flds_2.out2, iden2flds_2a.out1, iden2flds_2a.out2 - with Submitter(plugin="cf") as sub: - sub(wf) + worky = Worky(zip=[["test1", "test3", "test5"], ["test2", "test4", "test6"]]) - res = wf.result() + with Submitter(worker="cf") as sub: + res = sub(worky) assert ( - res.output.out1 - == res.output.out1a + res.outputs.out1 + == res.outputs.out1a == [["test1", "test3", "test5"], ["test2", "test4", "test6"]] ) - assert res.output.out2 == res.output.out2a == ["Hoi", "Hoi"] + assert res.outputs.out2 == res.outputs.out2a == ["Hoi", "Hoi"] # workflows with Left and Right part in splitters A -> B (L&R parts of the splitter) -def test_wf_ndstLR_1(plugin, tmpdir): +def test_wf_ndstLR_1(plugin, tmp_path): """Test workflow with 2 tasks, splitters on tasks levels The second task has its own simple splitter and the Left part from the first task should be added """ - wf = Workflow(name="wf_ndst_3", input_spec=["x", "y"]) - wf.add(add2(name="add2").split("x", x=wf.lzin.x)) - wf.add(multiply(name="mult", x=wf.add2.lzout.out).split("y", y=wf.lzin.y)) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + add2 = workflow.add(Add2().split("x", x=x), name="add2") + mult = workflow.add(Multiply(x=add2.out).split("y", y=y), name="mult") + return mult.out + + worky = Worky(x=[1, 2], y=[11, 12]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) # checking if the splitter is created properly - assert wf.mult.state.splitter == ["_add2", "mult.y"] - assert wf.mult.state.splitter_rpn == ["add2.x", "mult.y", "*"] + wf = Workflow.construct(worky) + assert wf["mult"].state.splitter == ["_add2", "mult.y"] + assert wf["mult"].state.splitter_rpn == ["add2.x", "mult.y", "*"] - results = wf.result() # expected: [({"add2.x": 1, "mult.y": 11}, 33), ({"add2.x": 1, "mult.y": 12}, 36), # ({"add2.x": 2, "mult.y": 11}, 44), ({"add2.x": 2, "mult.y": 12}, 48)] - assert results.output.out == [33, 36, 44, 48] - # checking the output directory - assert wf.output_dir.exists() + assert outputs.out == [33, 36, 44, 48] -def test_wf_ndstLR_1a(plugin, tmpdir): +def test_wf_ndstLR_1a(plugin, tmp_path): """Test workflow with 2 tasks, splitters on tasks levels The second task has splitter that has Left part (from previous state) and the Right part (it's own splitter) """ - wf = Workflow(name="wf_ndst_3", input_spec=["x", "y"]) - wf.add(add2(name="add2").split("x", x=wf.lzin.x)) - wf.add( - multiply(name="mult").split(["_add2", "y"], x=wf.add2.lzout.out, y=wf.lzin.y) - ) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + add2 = workflow.add(Add2().split("x", x=x), name="add2") + mult = workflow.add( + Multiply().split(["_add2", "y"], x=add2.out, y=y), name="mult" + ) + return mult.out + + worky = Worky(x=[1, 2], y=[11, 12]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) # checking if the splitter is created properly - assert wf.mult.state.splitter == ["_add2", "mult.y"] - assert wf.mult.state.splitter_rpn == ["add2.x", "mult.y", "*"] + wf = Workflow.construct(worky) + assert wf["mult"].state.splitter == ["_add2", "mult.y"] + assert wf["mult"].state.splitter_rpn == ["add2.x", "mult.y", "*"] - results = wf.result() # expected: [({"add2.x": 1, "mult.y": 11}, 33), ({"add2.x": 1, "mult.y": 12}, 36), # ({"add2.x": 2, "mult.y": 11}, 44), ({"add2.x": 2, "mult.y": 12}, 48)] - assert results.output.out == [33, 36, 44, 48] - # checking the output directory - assert wf.output_dir.exists() + assert outputs.out == [33, 36, 44, 48] -def test_wf_ndstLR_2(plugin, tmpdir): +def test_wf_ndstLR_2(plugin, tmp_path): """Test workflow with 2 tasks, splitters on tasks levels The second task has its own outer splitter and the Left part from the first task should be added """ - wf = Workflow(name="wf_ndst_3", input_spec=["x", "y", "z"]) - wf.add(add2(name="add2").split("x", x=wf.lzin.x)) - wf.add( - fun_addvar3(name="addvar", a=wf.add2.lzout.out).split( - ["b", "c"], b=wf.lzin.y, c=wf.lzin.z - ) - ) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [10, 20] - wf.inputs.z = [100, 200] - wf.set_output([("out", wf.addvar.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x, y, z): + add2 = workflow.add(Add2().split("x", x=x), name="add2") + addvar = workflow.add(FunAddVar3(a=add2.out).split(["b", "c"], b=y, c=z)) + return addvar.out + + worky = Worky(x=[1, 2, 3], y=[10, 20], z=[100, 200]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) # checking if the splitter is created properly - assert wf.addvar.state.splitter == ["_add2", ["addvar.b", "addvar.c"]] - assert wf.addvar.state.splitter_rpn == ["add2.x", "addvar.b", "addvar.c", "*", "*"] + wf = Workflow.construct(worky) + assert wf["addvar"].state.splitter == ["_add2", ["addvar.b", "addvar.c"]] + assert wf["addvar"].state.splitter_rpn == [ + "add2.x", + "addvar.b", + "addvar.c", + "*", + "*", + ] - results = wf.result() # expected: [({"add2.x": 1, "mult.b": 10, "mult.c": 100}, 113), # ({"add2.x": 1, "mult.b": 10, "mult.c": 200}, 213), # ({"add2.x": 1, "mult.b": 20, "mult.c": 100}, 123), # ({"add2.x": 1, "mult.b": 20, "mult.c": 200}, 223), # ...] - assert results.output.out == [ + assert outputs.out == [ 113, 213, 123, @@ -1706,42 +1377,44 @@ def test_wf_ndstLR_2(plugin, tmpdir): 125, 225, ] - # checking the output directory - assert wf.output_dir.exists() -def test_wf_ndstLR_2a(plugin, tmpdir): +def test_wf_ndstLR_2a(plugin, tmp_path): """Test workflow with 2 tasks, splitters on tasks levels The second task has splitter that has Left part (from previous state) and the Right part (it's own outer splitter) """ - wf = Workflow(name="wf_ndst_3", input_spec=["x", "y", "z"]) - wf.add(add2(name="add2").split("x", x=wf.lzin.x)) - wf.add( - fun_addvar3(name="addvar", a=wf.add2.lzout.out).split( - ["_add2", ["b", "c"]], b=wf.lzin.y, c=wf.lzin.z + + @workflow.define + def Worky(x, y, z): + add2 = workflow.add(Add2().split("x", x=x), name="add2") + addvar = workflow.add( + FunAddVar3(a=add2.out).split(["_add2", ["b", "c"]], b=y, c=z) ) - ) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [10, 20] - wf.inputs.z = [100, 200] - wf.set_output([("out", wf.addvar.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + return addvar.out + + worky = Worky(x=[1, 2, 3], y=[10, 20], z=[100, 200]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) # checking if the splitter is created properly - assert wf.addvar.state.splitter == ["_add2", ["addvar.b", "addvar.c"]] - assert wf.addvar.state.splitter_rpn == ["add2.x", "addvar.b", "addvar.c", "*", "*"] + wf = Workflow.construct(worky) + assert wf["addvar"].state.splitter == ["_add2", ["addvar.b", "addvar.c"]] + assert wf["addvar"].state.splitter_rpn == [ + "add2.x", + "addvar.b", + "addvar.c", + "*", + "*", + ] - results = wf.result() # expected: [({"add2.x": 1, "mult.b": 10, "mult.c": 100}, 113), # ({"add2.x": 1, "mult.b": 10, "mult.c": 200}, 213), # ({"add2.x": 1, "mult.b": 20, "mult.c": 100}, 123), # ({"add2.x": 1, "mult.b": 20, "mult.c": 200}, 223), # ...] - assert results.output.out == [ + assert outputs.out == [ 113, 213, 123, @@ -1755,147 +1428,131 @@ def test_wf_ndstLR_2a(plugin, tmpdir): 125, 225, ] - # checking the output directory - assert wf.output_dir.exists() # workflows with inner splitters A -> B (inner spl) -def test_wf_ndstinner_1(plugin, tmpdir): +def test_wf_ndstinner_1(plugin, tmp_path): """workflow with 2 tasks, the second task has inner splitter """ - wf = Workflow(name="wf_st_3", input_spec={"x": int}) - wf.add(list_output(name="list", x=wf.lzin.x)) - wf.add(add2(name="add2").split("x", x=wf.list.lzout.out)) - wf.inputs.x = 1 - wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define(outputs=["out_list", "out"]) + def Worky(x: int): + list = workflow.add(ListOutput(x=x)) + add2 = workflow.add(Add2().split("x", x=list.out), name="add2") + return list.out, add2.out + + worky = Worky(x=1) - assert wf.add2.state.splitter == "add2.x" - assert wf.add2.state.splitter_rpn == ["add2.x"] + wf = Workflow.construct(worky) + assert wf["add2"].state.splitter == "add2.x" + assert wf["add2"].state.splitter_rpn == ["add2.x"] - results = wf.result() - assert results.output.out_list == [1, 2, 3] - assert results.output.out == [3, 4, 5] + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf.output_dir.exists() + assert outputs.out_list == [1, 2, 3] + assert outputs.out == [3, 4, 5] -def test_wf_ndstinner_2(plugin, tmpdir): +def test_wf_ndstinner_2(plugin, tmp_path): """workflow with 2 tasks, the second task has two inputs and inner splitter from one of the input """ - wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(list_output(name="list", x=wf.lzin.x)) - wf.add(multiply(name="mult", y=wf.lzin.y).split("x", x=wf.list.lzout.out)) - wf.inputs.x = 1 - wf.inputs.y = 10 - wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define(outputs=["out_list", "out"]) + def Worky(x, y): + list = workflow.add(ListOutput(x=x)) + mult = workflow.add(Multiply(y=y).split("x", x=list.out), name="mult") + return list.out, mult.out + + worky = Worky(x=1, y=10) # - assert wf.mult.state.splitter == "mult.x" - assert wf.mult.state.splitter_rpn == ["mult.x"] + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - assert results.output.out_list == [1, 2, 3] - assert results.output.out == [10, 20, 30] + wf = Workflow.construct(worky) + assert wf["mult"].state.splitter == "mult.x" + assert wf["mult"].state.splitter_rpn == ["mult.x"] - assert wf.output_dir.exists() + assert outputs.out_list == [1, 2, 3] + assert outputs.out == [10, 20, 30] -def test_wf_ndstinner_3(plugin, tmpdir): +def test_wf_ndstinner_3(plugin, tmp_path): """workflow with 2 tasks, the second task has two inputs and outer splitter that includes an inner field """ - wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(list_output(name="list", x=wf.lzin.x)) - wf.add(multiply(name="mult").split(["x", "y"], x=wf.list.lzout.out, y=wf.lzin.y)) - wf.inputs.x = 1 - wf.inputs.y = [10, 100] - wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define(outputs=["out_list", "out"]) + def Worky(x, y): + list = workflow.add(ListOutput(x=x)) + mult = workflow.add(Multiply().split(["x", "y"], x=list.out, y=y), name="mult") + return list.out, mult.out - assert wf.mult.state.splitter == ["mult.x", "mult.y"] - assert wf.mult.state.splitter_rpn == ["mult.x", "mult.y", "*"] + worky = Worky(x=1, y=[10, 100]) - results = wf.result() - assert results.output.out_list == [1, 2, 3] - assert results.output.out == [10, 100, 20, 200, 30, 300] + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf.output_dir.exists() + wf = Workflow.construct(worky) + assert wf["mult"].state.splitter == ["mult.x", "mult.y"] + assert wf["mult"].state.splitter_rpn == ["mult.x", "mult.y", "*"] + assert outputs.out_list == [1, 2, 3] + assert outputs.out == [10, 100, 20, 200, 30, 300] -def test_wf_ndstinner_4(plugin, tmpdir): + +def test_wf_ndstinner_4(plugin, tmp_path): """workflow with 3 tasks, the second task has two inputs and inner splitter from one of the input, the third task has no its own splitter """ - wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(list_output(name="list", x=wf.lzin.x)) - wf.add(multiply(name="mult", y=wf.lzin.y).split("x", x=wf.list.lzout.out)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) - wf.inputs.x = 1 - wf.inputs.y = 10 - wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define(outputs=["out_list", "out"]) + def Worky(x, y): + list = workflow.add(ListOutput(x=x)) + mult = workflow.add(Multiply(y=y).split("x", x=list.out), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + return list.out, add2.out + + worky = Worky(x=1, y=10) - assert wf.mult.state.splitter == "mult.x" - assert wf.mult.state.splitter_rpn == ["mult.x"] - assert wf.add2.state.splitter == "_mult" - assert wf.add2.state.splitter_rpn == ["mult.x"] + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - assert results.output.out_list == [1, 2, 3] - assert results.output.out == [12, 22, 32] + wf = Workflow.construct(worky) + assert wf["mult"].state.splitter == "mult.x" + assert wf["mult"].state.splitter_rpn == ["mult.x"] + assert wf["add2"].state.splitter == "_mult" + assert wf["add2"].state.splitter_rpn == ["mult.x"] - assert wf.output_dir.exists() + assert outputs.out_list == [1, 2, 3] + assert outputs.out == [12, 22, 32] -def test_wf_ndstinner_5(plugin, tmpdir): +def test_wf_ndstinner_5(plugin, tmp_path): """workflow with 3 tasks, the second task has two inputs and inner splitter from one of the input, (inner input come from the first task that has its own splitter, there is a inner_cont_dim) the third task has no new splitter """ - wf = Workflow(name="wf_5", input_spec=["x", "y", "b"]) - wf.add(list_output(name="list").split("x", x=wf.lzin.x)) - wf.add(multiply(name="mult").split(["y", "x"], x=wf.list.lzout.out, y=wf.lzin.y)) - wf.add(fun_addvar(name="addvar", a=wf.mult.lzout.out).split("b", b=wf.lzin.b)) - wf.inputs.x = [1, 2] - wf.inputs.y = [10, 100] - wf.inputs.b = [3, 5] - - wf.set_output( - [ - ("out_list", wf.list.lzout.out), - ("out_mult", wf.mult.lzout.out), - ("out_add", wf.addvar.lzout.out), - ] - ) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define(outputs=["out_list", "out_mult", "out_add"]) + def Worky(x, y, b): + list = workflow.add(ListOutput().split("x", x=x)) + mult = workflow.add(Multiply().split(["y", "x"], x=list.out, y=y), name="mult") + addvar = workflow.add(FunAddVar(a=mult.out).split("b", b=b)) + return list.out, mult.out, addvar.out + + worky = Worky(x=[1, 2], y=[10, 100], b=[3, 5]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf.mult.state.splitter == ["_list", ["mult.y", "mult.x"]] - assert wf.mult.state.splitter_rpn == ["list.x", "mult.y", "mult.x", "*", "*"] - assert wf.addvar.state.splitter == ["_mult", "addvar.b"] - assert wf.addvar.state.splitter_rpn == [ + wf = Workflow.construct(worky) + assert wf["mult"].state.splitter == ["_list", ["mult.y", "mult.x"]] + assert wf["mult"].state.splitter_rpn == ["list.x", "mult.y", "mult.x", "*", "*"] + assert wf["addvar"].state.splitter == ["_mult", "addvar.b"] + assert wf["addvar"].state.splitter_rpn == [ "list.x", "mult.y", "mult.x", @@ -1905,9 +1562,8 @@ def test_wf_ndstinner_5(plugin, tmpdir): "*", ] - results = wf.result() - assert results.output.out_list == [[1, 2, 3], [2, 4, 6]] - assert results.output.out_mult == [ + assert outputs.out_list == [[1, 2, 3], [2, 4, 6]] + assert outputs.out_mult == [ 10, 20, 30, @@ -1921,7 +1577,7 @@ def test_wf_ndstinner_5(plugin, tmpdir): 400, 600, ] - assert results.output.out_add == [ + assert outputs.out_add == [ 13, 15, 23, @@ -1948,642 +1604,608 @@ def test_wf_ndstinner_5(plugin, tmpdir): 605, ] - assert wf.output_dir.exists() - # workflow that have some single values as the input -def test_wf_st_singl_1(plugin, tmpdir): +def test_wf_st_singl_1(plugin, tmp_path): """workflow with two tasks, only one input is in the splitter and combiner""" - wf = Workflow(name="wf_st_5", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) - wf.split("x", x=[1, 2], y=11) - wf.combine("x") - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") - with Submitter(plugin=plugin) as sub: - sub(wf) + return add2.out - results = wf.result() - assert results[0].output.out == 13 - assert results[1].output.out == 24 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + worky = Worky().split("x", x=[1, 2], y=11).combine("x") + + outputs = worky(worker=plugin, cache_dir=tmp_path) + assert outputs.out[0] == 13 + assert outputs.out[1] == 24 -def test_wf_ndst_singl_1(plugin, tmpdir): + +def test_wf_ndst_singl_1(plugin, tmp_path): """workflow with two tasks, outer splitter and combiner on tasks level; only one input is part of the splitter, the other is a single value """ - wf = Workflow(name="wf_ndst_5", input_spec=["x", "y"]) - wf.add(multiply(name="mult", y=wf.lzin.y).split("x", x=wf.lzin.x)) - wf.add(add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) - wf.inputs.x = [1, 2] - wf.inputs.y = 11 - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(y=y).split("x", x=x), name="mult") + add2 = workflow.add(Add2(x=mult.out).combine("mult.x"), name="add2") + return add2.out + + worky = Worky(x=[1, 2], y=11) + + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - assert results.output.out == [13, 24] - # checking the output directory - assert wf.output_dir.exists() + assert outputs.out == [13, 24] -def test_wf_st_singl_2(plugin, tmpdir): +def test_wf_st_singl_2(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter on the workflow level only one input is part of the splitter, the other is a single value """ - wf = Workflow(name="wf_st_6", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x)) - wf.add(add2(name="add2y", x=wf.lzin.y)) - wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) - wf.split("x", x=[1, 2, 3], y=11) - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Worky(x, y): + add2x = workflow.add(Add2(x=x), name="add2x") + add2y = workflow.add(Add2(x=y), name="add2y") + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out), name="mult") + return mult.out - with Submitter(plugin=plugin) as sub: - sub(wf) + worky = Worky().split("x", x=[1, 2, 3], y=11) - results = wf.result() - assert len(results) == 3 - assert results[0].output.out == 39 - assert results[1].output.out == 52 - assert results[2].output.out == 65 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + outputs = worky(worker=plugin, cache_dir=tmp_path) + + assert outputs.out[0] == 39 + assert outputs.out[1] == 52 + assert outputs.out[2] == 65 -def test_wf_ndst_singl_2(plugin, tmpdir): +def test_wf_ndst_singl_2(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter on the tasks levels only one input is part of the splitter, the other is a single value """ - wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y", x=wf.lzin.y)) - wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = 11 - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x, y): + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2(x=y), name="add2y") + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out), name="mult") + return mult.out + + worky = Worky(x=[1, 2, 3], y=11) - results = wf.result() - assert len(results.output.out) == 3 - assert results.output.out == [39, 52, 65] - # checking the output directory - assert wf.output_dir.exists() + outputs = worky(worker=plugin, cache_dir=tmp_path) + assert len(outputs.out) == 3 + assert outputs.out == [39, 52, 65] -# workflows with structures wf(A) +# workflows with structures worky(A) -def test_wfasnd_1(plugin, tmpdir): + +def test_wfasnd_1(plugin, tmp_path): """workflow as a node workflow-node with one task and no splitter """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wfnd.inputs.x = 2 - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + + @workflow.define + def Worky(x): + wfnd = workflow.add(Wfnd(x)) + return wfnd.out - with Submitter(plugin=plugin) as sub: - sub(wf) + worky = Worky(x=2) - results = wf.result() - assert results.output.out == 4 - # checking the output directory - assert wf.output_dir.exists() + outputs = worky(worker=plugin, cache_dir=tmp_path) + assert outputs.out == 4 -def test_wfasnd_wfinp_1(plugin, tmpdir): + +def test_wfasnd_wfinp_1(plugin, tmp_path): """workflow as a node workflow-node with one task and no splitter input set for the main workflow """ - wf = Workflow(name="wf", input_spec=["x"]) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf.add(wfnd) - wf.inputs.x = 2 - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + + @workflow.define + def Worky(x): + wfnd = workflow.add(Wfnd(x=x)) + return wfnd.out + + worky = Worky(x=2) - checksum_before = wf.checksum - with Submitter(plugin=plugin) as sub: - sub(wf) + checksum_before = worky._hash + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf.checksum == checksum_before - results = wf.result() - assert results.output.out == 4 - # checking the output directory - assert wf.output_dir.exists() + wf = Workflow.construct(worky) + assert worky._hash == checksum_before + assert outputs.out == 4 -def test_wfasnd_wfndupdate(plugin, tmpdir): + +def test_wfasnd_wfndupdate(plugin, tmp_path): """workflow as a node workflow-node with one task and no splitter wfasnode input is updated to use the main workflow input """ - wfnd = Workflow(name="wfnd", input_spec=["x"], x=2) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + + @workflow.define + def Worky(x): + wfnd = workflow.add(Wfnd(x)) + return wfnd.out - wf = Workflow(name="wf", input_spec=["x"], x=3) - wfnd.inputs.x = wf.lzin.x - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + worky = Worky(x=3) - with Submitter(plugin=plugin) as sub: - sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - assert results.output.out == 5 - assert wf.output_dir.exists() + assert outputs.out == 5 -def test_wfasnd_wfndupdate_rerun(plugin, tmpdir): +def test_wfasnd_wfndupdate_rerun(plugin, tmp_path): """workflow as a node workflow-node with one task and no splitter wfasnode is run first and later is updated to use the main workflow input """ - wfnd = Workflow(name="wfnd", input_spec=["x"], x=2) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wfnd.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + + wfnd = Wfnd(x=2) + + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: sub(wfnd) - wf = Workflow(name="wf", input_spec=["x"], x=3) - # trying to set before - wfnd.inputs.x = wf.lzin.x - wf.add(wfnd) - # trying to set after add... - wf.wfnd.inputs.x = wf.lzin.x - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Worky(x): + wfnd = workflow.add(Wfnd(x)) + return wfnd.out + + worky = Worky(x=3) - with Submitter(plugin=plugin) as sub: - sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - assert results.output.out == 5 - assert wf.output_dir.exists() + assert outputs.out == 5 # adding another layer of workflow - wf_o = Workflow(name="wf_o", input_spec=["x"], x=4) - wf.inputs.x = wf_o.lzin.x - wf_o.add(wf) - wf_o.set_output([("out", wf_o.wf.lzout.out)]) - wf_o.cache_dir = tmpdir + @workflow.define + def WorkyO(x): + worky = workflow.add(Worky(x=3)) + return wf["out"] + + wf_o = WorkyO(x=4) - with Submitter(plugin=plugin) as sub: - sub(wf_o) + outputs = wf_o(worker=plugin, cache_dir=tmp_path) - results = wf_o.result() - assert results.output.out == 6 - assert wf_o.output_dir.exists() + assert outputs.out == 6 -def test_wfasnd_st_1(plugin, tmpdir): +def test_wfasnd_st_1(plugin, tmp_path): """workflow as a node workflow-node with one task, splitter for wfnd """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wfnd.split("x", x=[2, 4]) - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + + @workflow.define + def Worky(x): + wfnd = workflow.add(Wfnd(x=x).split(x=x)) + return wfnd.out + + worky = Worky(x=[2, 4]) - checksum_before = wf.checksum - with Submitter(plugin=plugin) as sub: - sub(wf) + checksum_before = worky._hash + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf.checksum == checksum_before - results = wf.result() - assert results.output.out == [4, 6] - # checking the output directory - assert wf.output_dir.exists() + wf = Workflow.construct(worky) + assert worky._hash == checksum_before + assert outputs.out == [4, 6] -def test_wfasnd_st_updatespl_1(plugin, tmpdir): + +def test_wfasnd_st_updatespl_1(plugin, tmp_path): """workflow as a node workflow-node with one task, splitter for wfnd is set after add """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wfnd.split("x", x=[2, 4]) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + + @workflow.define + def Worky(x): + wfnd = workflow.add(Wfnd(x=x).split(x=x)) + return wfnd.out + + worky = Worky(x=[2, 4]) - with Submitter(plugin=plugin) as sub: - sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - assert results.output.out == [4, 6] - # checking the output directory - assert wf.output_dir.exists() + assert outputs.out == [4, 6] -def test_wfasnd_ndst_1(plugin, tmpdir): +def test_wfasnd_ndst_1(plugin, tmp_path): """workflow as a node workflow-node with one task, splitter for node """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2").split("x", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - # TODO: without this the test is failing - wfnd.plugin = plugin - wfnd.inputs.x = [2, 4] - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2().split("x", x=x), name="add2") + return add2.out + + @workflow.define + def Worky(x): + wfnd = workflow.add(Wfnd(x=x)) + return wfnd.out + + worky = Worky(x=[2, 4]) - with Submitter(plugin=plugin) as sub: - sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - assert results.output.out == [4, 6] - # checking the output directory - assert wf.output_dir.exists() + assert outputs.out == [4, 6] -def test_wfasnd_ndst_updatespl_1(plugin, tmpdir): +def test_wfasnd_ndst_updatespl_1(plugin, tmp_path): """workflow as a node workflow-node with one task, splitter for node added after add """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) - wfnd.add2.split("x", x=[2, 4]) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2().split("x", x=x), name="add2") + return add2.out - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x): + wfnd = workflow.add(Wfnd(x=x)) + return wfnd.out - results = wf.result() - assert results.output.out == [4, 6] - # checking the output directory - assert wf.output_dir.exists() + worky = Worky(x=[2, 4]) + outputs = worky(worker=plugin, cache_dir=tmp_path) -def test_wfasnd_wfst_1(plugin, tmpdir): + assert outputs.out == [4, 6] + + +def test_wfasnd_wfst_1(plugin, tmp_path): """workflow as a node workflow-node with one task, splitter for the main workflow """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - - wf.add(wfnd) - wf.split("x", x=[2, 4]) - wf.set_output([("out", wf.wfnd.lzout.out)]) - - with Submitter(plugin=plugin) as sub: - sub(wf) - # assert wf.output_dir.exists() - results = wf.result() - assert results[0].output.out == 4 - assert results[1].output.out == 6 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + + @workflow.define + def Worky(x): + wfnd = workflow.add(Wfnd(x=x)) + return wfnd.out + + worky = Worky().split("x", x=[2, 4]) -# workflows with structures wf(A) -> B + outputs = worky(worker=plugin, cache_dir=tmp_path) + # assert wf["output_dir"].exists() -def test_wfasnd_st_2(plugin, tmpdir): + assert outputs.out[0] == 4 + assert outputs.out[1] == 6 + + +# workflows with structures worky(A) -> B + + +def test_wfasnd_st_2(plugin, tmp_path): """workflow as a node, the main workflow has two tasks, splitter for wfnd """ - wfnd = Workflow(name="wfnd", input_spec=["x", "y"]) - wfnd.add(multiply(name="mult", x=wfnd.lzin.x, y=wfnd.lzin.y)) - wfnd.set_output([("out", wfnd.mult.lzout.out)]) - wfnd.split(("x", "y"), x=[2, 4], y=[1, 10]) - wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(wfnd) - wf.add(add2(name="add2", x=wf.wfnd.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x, y): + mult = workflow.add(Multiply().split(("x", "y"), x=x, y=y), name="mult") + return mult.out + + @workflow.define + def Worky(x, y): + wfnd = workflow.add(Wfnd(x=x, y=y)) + add2 = workflow.add(Add2(x=wfnd.out), name="add2") + return add2.out + + worky = Worky(x=[2, 4], y=[1, 10]) - with Submitter(plugin=plugin) as sub: - sub(wf) - # assert wf.output_dir.exists() - results = wf.result() - assert results.output.out == [4, 42] - # checking the output directory - assert wf.output_dir.exists() + outputs = worky(worker=plugin, cache_dir=tmp_path) + # assert wf["output_dir"].exists() -def test_wfasnd_wfst_2(plugin, tmpdir): + assert outputs.out == [4, 42] + + +def test_wfasnd_wfst_2(plugin, tmp_path): """workflow as a node, the main workflow has two tasks, splitter for the main workflow """ - wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wfnd = Workflow(name="wfnd", input_spec=["x", "y"], x=wf.lzin.x, y=wf.lzin.y) - wfnd.add(multiply(name="mult", x=wfnd.lzin.x, y=wfnd.lzin.y)) - wfnd.set_output([("out", wfnd.mult.lzout.out)]) - - wf.add(wfnd) - wf.add(add2(name="add2", x=wf.wfnd.lzout.out)) - wf.split(("x", "y"), x=[2, 4], y=[1, 10]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) - # assert wf.output_dir.exists() - results = wf.result() - assert results[0].output.out == 4 - assert results[1].output.out == 42 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + @workflow.define + def Wfnd(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + return mult.out + + @workflow.define + def Worky(x, y): + wfnd = workflow.add(Wfnd(x=x, y=y)) + add2 = workflow.add(Add2(x=wfnd.out), name="add2") + return add2.out + + worky = Worky().split(("x", "y"), x=[2, 4], y=[1, 10]) -# workflows with structures A -> wf(B) + outputs = worky(worker=plugin, cache_dir=tmp_path) + # assert wf["output_dir"].exists() -def test_wfasnd_ndst_3(plugin, tmpdir): + assert outputs.out[0] == 4 + assert outputs.out[1] == 42 + + +# workflows with structures A -> worky(B) + + +def test_wfasnd_ndst_3(plugin, tmp_path): """workflow as the second node, the main workflow has two tasks, splitter for the first task """ - wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split(("x", "y"), x=wf.lzin.x, y=wf.lzin.y)) - wf.inputs.x = [2, 4] - wf.inputs.y = [1, 10] - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf.add(wfnd) + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply().split(("x", "y"), x=x, y=y), name="mult") + wfnd = workflow.add(Wfnd(mult.out)) + return wfnd.out + + worky = Worky(x=[2, 4], y=[1, 10]) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + outputs = worky(cache_dir=tmp_path, plugin=plugin) - with Submitter(plugin="serial") as sub: - sub(wf) - # assert wf.output_dir.exists() - results = wf.result() - assert results.output.out == [4, 42] - # checking the output directory - assert wf.output_dir.exists() + # assert wf["output_dir"].exists() + assert outputs.out == [4, 42] -def test_wfasnd_wfst_3(plugin, tmpdir): + +def test_wfasnd_wfst_3(plugin, tmp_path): """workflow as the second node, the main workflow has two tasks, splitter for the main workflow """ - wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.split(("x", "y"), x=[2, 4], y=[1, 10]) - - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf.add(wfnd) - - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.plugin = plugin - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) - # assert wf.output_dir.exists() - results = wf.result() - assert results[0].output.out == 4 - assert results[1].output.out == 42 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + + wfnd = workflow.add(Wfnd(mult.out)) + + return wfnd.out + + worky = Worky().split(("x", "y"), x=[2, 4], y=[1, 10]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) + + # assert wf["output_dir"].exists() + + assert outputs.out[0] == 4 + assert outputs.out[1] == 42 # workflows with structures wfns(A->B) -def test_wfasnd_4(plugin, tmpdir): +def test_wfasnd_4(plugin, tmp_path): """workflow as a node workflow-node with two tasks and no splitter """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2_1st", x=wfnd.lzin.x)) - wfnd.add(add2(name="add2_2nd", x=wfnd.add2_1st.lzout.out)) - wfnd.set_output([("out", wfnd.add2_2nd.lzout.out)]) - wfnd.inputs.x = 2 - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2_1st = workflow.add(Add2(x=x), name="add2_1st") + add2_2nd = workflow.add(Add2(x=add2_1st.out), name="add2_2nd") + return add2_2nd.out + + @workflow.define + def Worky(x): + wfnd = workflow.add(Wfnd(x=2)) + return wfnd.out - with Submitter(plugin=plugin) as sub: - sub(wf) + worky = Worky(x=2) - results = wf.result() - assert results.output.out == 6 - # checking the output directory - assert wf.output_dir.exists() + outputs = worky(worker=plugin, cache_dir=tmp_path) + assert outputs.out == 6 -def test_wfasnd_ndst_4(plugin, tmpdir): + +def test_wfasnd_ndst_4(plugin, tmp_path): """workflow as a node workflow-node with two tasks, splitter for node """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2_1st").split("x", x=wfnd.lzin.x)) - wfnd.add(add2(name="add2_2nd", x=wfnd.add2_1st.lzout.out)) - wfnd.set_output([("out", wfnd.add2_2nd.lzout.out)]) - wfnd.inputs.x = [2, 4] - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2_1st = workflow.add(Add2().split(x=x), name="add2_1st") + add2_2nd = workflow.add(Add2(x=add2_1st.out), name="add2_2nd") + return add2_2nd.out + + @workflow.define + def Worky(x): + wfnd = workflow.add(Wfnd(x=x)) + return wfnd.out + + worky = Worky(x=[2, 4]) - with Submitter(plugin=plugin) as sub: - sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - assert results.output.out == [6, 8] - # checking the output directory - assert wf.output_dir.exists() + assert outputs.out == [6, 8] -def test_wfasnd_wfst_4(plugin, tmpdir): +def test_wfasnd_wfst_4(plugin, tmp_path): """workflow as a node workflow-node with two tasks, splitter for the main workflow """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x) - wfnd.add(add2(name="add2_1st", x=wfnd.lzin.x)) - wfnd.add(add2(name="add2_2nd", x=wfnd.add2_1st.lzout.out)) - wfnd.set_output([("out", wfnd.add2_2nd.lzout.out)]) - - wf.add(wfnd) - wf.split("x", x=[2, 4]) - wf.set_output([("out", wf.wfnd.lzout.out)]) - - with Submitter(plugin=plugin) as sub: - sub(wf) - # assert wf.output_dir.exists() - results = wf.result() - assert results[0].output.out == 6 - assert results[1].output.out == 8 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + + @workflow.define + def Wfnd(x): + add2_1st = workflow.add(Add2(x=x), name="add2_1st") + add2_2nd = workflow.add(Add2(x=add2_1st.out), name="add2_2nd") + return add2_2nd.out + + @workflow.define + def Worky(x): + wfnd = workflow.add(Wfnd(x=x)) + return wfnd.out + + worky = Worky().split("x", x=[2, 4]) + + outputs = worky(worker=plugin, cache_dir=tmp_path) + + # assert wf["output_dir"].exists() + + assert outputs.out[0] == 6 + assert outputs.out[1] == 8 # Testing caching @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachedir(plugin, tmpdir): - """wf with provided cache_dir using pytest tmpdir""" - cache_dir = tmpdir.mkdir("test_wf_cache_1") +def test_wf_nostate_cachedir(plugin, tmp_path): + """worky with provided cache_dir using pytest tmp_path""" + cache_dir = tmp_path.mkdir("test_wf_cache_1") - wf = Workflow(name="wf_2", input_spec=["x", "y"], cache_dir=cache_dir) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = 3 + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + return add2.out - with Submitter(plugin=plugin) as sub: - sub(wf) + worky = Worky(x=2, y=3) - assert wf.output_dir.exists() - results = wf.result() - assert 8 == results.output.out + outputs = worky(worker=plugin, cache_dir=tmp_path) + + assert 8 == outputs.out shutil.rmtree(cache_dir) @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachedir_relativepath(tmpdir, plugin): - """wf with provided cache_dir as relative path""" - tmpdir.chdir() +def test_wf_nostate_cachedir_relativepath(tmp_path, plugin): + """worky with provided cache_dir as relative path""" + tmp_path.chdir() cache_dir = "test_wf_cache_2" - tmpdir.mkdir(cache_dir) + tmp_path.mkdir(cache_dir) + + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + return add2.out - wf = Workflow(name="wf_2", input_spec=["x", "y"], cache_dir=cache_dir) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = 3 + worky = Worky(x=2, y=3) - with Submitter(plugin=plugin) as sub: - sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf.output_dir.exists() - results = wf.result() - assert 8 == results.output.out + assert 8 == outputs.out shutil.rmtree(cache_dir) @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations(plugin, tmpdir): +def test_wf_nostate_cachelocations(plugin, tmp_path): """ Two identical wfs with provided cache_dir; - the second wf has cache_locations and should not recompute the results + the second worky has cache_locations and should not recompute the results """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") + + @workflow.define + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 + worky1 = Worky1(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 + @workflow.define + def Worky2(x, y): + + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky2 = Worky2(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(worky2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out + assert 8 == results2.outputs.out # checking execution time (for unix and cf) # for win and dask/slurm the time for dir creation etc. might take much longer @@ -2591,119 +2213,114 @@ def test_wf_nostate_cachelocations(plugin, tmpdir): assert t1 > 2 assert t2 < max(1, t1 - 1) - # checking if the second wf didn't run again - assert wf1.output_dir.exists() - assert not wf2.output_dir.exists() + # checking if the second worky didn't run again @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_a(plugin, tmpdir): +def test_wf_nostate_cachelocations_a(plugin, tmp_path): """ the same as previous test, but workflows names differ; the task should not be run and it should be fast, - but the wf itself is triggered and the new output dir is created + but the worky itself is triggered and the new output dir is created """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf1", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + @workflow.define + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky1 = Worky1(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf2", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define + def Worky2(x, y): + + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky2 = Worky2(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(worky2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out + assert 8 == results2.outputs.out # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": # checking execution time (second one should be quick) assert t1 > 2 - # testing relative values (windows or slurm takes much longer to create wf itself) + # testing relative values (windows or slurm takes much longer to create worky itself) assert t2 < max(1, t1 - 1) - # checking if both wf.output_dir are created - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + # checking if both wf["output_dir"] are created + assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_b(plugin, tmpdir): +def test_wf_nostate_cachelocations_b(plugin, tmp_path): """ the same as previous test, but the 2nd workflows has two outputs (connected to the same task output); the task should not be run and it should be fast, - but the wf itself is triggered and the new output dir is created + but the worky itself is triggered and the new output dir is created """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + @workflow.define + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky1 = Worky1(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - # additional output - wf2.set_output([("out_pr", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define("out_pr") + def Worky2(x, y): + + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky2 = Worky2(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(worky2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out == results2.output.out_pr + assert 8 == results2.outputs.out == results2.outputs.out_pr # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -2711,173 +2328,166 @@ def test_wf_nostate_cachelocations_b(plugin, tmpdir): assert t1 > 2 assert t2 < max(1, t1 - 1) - # checking if the second wf didn't run again - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + # checking if the second worky didn't run again + assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_setoutputchange(plugin, tmpdir): +def test_wf_nostate_cachelocations_setoutputchange(plugin, tmp_path): """ - the same as previous test, but wf output names differ, + the same as previous test, but worky output names differ, the tasks should not be run and it should be fast, - but the wf itself is triggered and the new output dir is created - (the second wf has updated name in its Output) + but the worky itself is triggered and the new output dir is created + (the second worky has updated name in its Output) """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") + + @workflow.define(outputs=["out1"]) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out # out1 - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out1", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + worky1 = Worky1(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out1 + assert 8 == results1.outputs.out1 - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out2", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define(outputs=["out2"]) + def Worky2(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out # out2 + + worky2 = Worky2(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(worky2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out2 + assert 8 == results2.outputs.out2 # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": - # checking execution time (the second wf should be fast, nodes do not have to rerun) + # checking execution time (the second worky should be fast, nodes do not have to rerun) assert t1 > 2 - # testing relative values (windows or slurm takes much longer to create wf itself) + # testing relative values (windows or slurm takes much longer to create worky itself) assert t2 < max(1, t1 - 1) - # both wf output_dirs should be created - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + # both worky output_dirs should be created + assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_setoutputchange_a(plugin, tmpdir): +def test_wf_nostate_cachelocations_setoutputchange_a(plugin, tmp_path): """ - the same as previous test, but wf names and output names differ, + the same as previous test, but worky names and output names differ, """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") + + @workflow.define(outputs=["out1"]) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out # out1 - wf1 = Workflow(name="wf1", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out1", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + worky1 = Worky1(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out1 + assert 8 == results1.outputs.out1 - wf2 = Workflow( - name="wf2", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out2", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define(outputs=["out2"]) + def Worky2(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky2 = Worky2(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(worky2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out2 + assert 8 == results2.outputs.out2 # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": assert t1 > 2 - # testing relative values (windows or slurm takes much longer to create wf itself) + # testing relative values (windows or slurm takes much longer to create worky itself) assert t2 < max(1, t1 - 1) - # both wf output_dirs should be created - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + # both worky output_dirs should be created + assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_forcererun(plugin, tmpdir): +def test_wf_nostate_cachelocations_forcererun(plugin, tmp_path): """ Two identical wfs with provided cache_dir; - the second wf has cache_locations, + the second worky has cache_locations, but submitter is called with rerun=True, so should recompute """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") + + @workflow.define + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + worky1 = Worky1(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define + def Worky2(x, y): + + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky2 = Worky2(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2, rerun=True) + with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: + results2 = sub(worky2, rerun=True) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out + assert 8 == results2.outputs.out # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -2885,62 +2495,58 @@ def test_wf_nostate_cachelocations_forcererun(plugin, tmpdir): assert t1 > 2 assert t2 > 2 - # checking if the second wf didn't run again - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + # checking if the second worky didn't run again + assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_wftaskrerun_propagateTrue(plugin, tmpdir): +def test_wf_nostate_cachelocations_wftaskrerun_propagateTrue(plugin, tmp_path): """ Two identical wfs with provided cache_dir and cache_locations for the second one; - submitter doesn't have rerun, but the second wf has rerun=True, + submitter doesn't have rerun, but the second worky has rerun=True, propagate_rerun is True as default, so everything should be rerun """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") + + @workflow.define + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + worky1 = Worky1(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - rerun=True, # wh has to be rerun (default for propagate_rerun is True) - ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define + def Worky2(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky2 = Worky2(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(worky2, rerun=True) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out + assert 8 == results2.outputs.out - # checking if the second wf runs again - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + # checking if the second worky runs again + assert results1.output_dir != results2.output_dir # everything has to be recomputed assert len(list(Path(cache_dir1).glob("F*"))) == 2 @@ -2953,58 +2559,54 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateTrue(plugin, tmpdir): @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_wftaskrerun_propagateFalse(plugin, tmpdir): +def test_wf_nostate_cachelocations_wftaskrerun_propagateFalse(plugin, tmp_path): """ Two identical wfs with provided cache_dir and cache_locations for the second one; - submitter doesn't have rerun, but the second wf has rerun=True, - propagate_rerun is set to False, so wf will be triggered, + submitter doesn't have rerun, but the second worky has rerun=True, + propagate_rerun is set to False, so worky will be triggered, but tasks will not have rerun, so will use the previous results """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") + + @workflow.define + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + worky1 = Worky1(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - rerun=True, # wh has to be rerun - propagate_rerun=False, # but rerun doesn't propagate to the tasks - ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define + def Worky2(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky2 = Worky2(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(worky2, rerun=True, propagate_rerun=False) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out + assert 8 == results2.outputs.out - # checking if the second wf runs again - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + # checking if the second worky runs again + assert results1.output_dir != results2.output_dir # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3018,57 +2620,53 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateFalse(plugin, tmpdir): @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_taskrerun_wfrerun_propagateFalse(plugin, tmpdir): +def test_wf_nostate_cachelocations_taskrerun_wfrerun_propagateFalse(plugin, tmp_path): """ - Two identical wfs with provided cache_dir, and cache_locations for the second wf; - submitter doesn't have rerun, but wf has rerun=True, + Two identical wfs with provided cache_dir, and cache_locations for the second worky; + submitter doesn't have rerun, but worky has rerun=True, since propagate_rerun=False, only tasks that have rerun=True will be rerun """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") + + @workflow.define + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + worky1 = Worky1(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - rerun=True, - propagate_rerun=False, # rerun will not be propagated to each task - ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - # rerun on the task level needed (wf.propagate_rerun is False) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out, rerun=True)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define + def Worky2(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + # rerun on the task level needed (wf["propagate_rerun"] is False) + add2 = workflow.add(Add2Wait(x=mult.out, rerun=True), name="add2") + return add2.out + + worky2 = Worky2(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub( + worky2, rerun=True, propagate_rerun=False + ) # rerun will not be propagated to each task) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out + assert 8 == results2.outputs.out - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + assert results1.output_dir != results2.output_dir # the second task should be recomputed assert len(list(Path(cache_dir1).glob("F*"))) == 2 assert len(list(Path(cache_dir2).glob("F*"))) == 1 @@ -3081,145 +2679,152 @@ def test_wf_nostate_cachelocations_taskrerun_wfrerun_propagateFalse(plugin, tmpd @pytest.mark.flaky(reruns=3) -def test_wf_nostate_nodecachelocations(plugin, tmpdir): +def test_wf_nostate_nodecachelocations(plugin, tmp_path): """ Two wfs with different input, but the second node has the same input; - the second wf has cache_locations and should recompute the wf, + the second worky has cache_locations and should recompute the worky, but without recomputing the second node """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x"], cache_dir=cache_dir1) - wf1.add(ten(name="ten", x=wf1.lzin.x)) - wf1.add(add2(name="add2", x=wf1.ten.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 3 - wf1.plugin = plugin + @workflow.define + def Worky1(x): + ten = workflow.add(Ten(x=x)) + add2 = workflow.add(Add2(x=ten.out), name="add2") + return add2.out - with Submitter(plugin=plugin) as sub: - sub(wf1) + worky1 = Worky1(x=3) - results1 = wf1.result() - assert 12 == results1.output.out + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(ten(name="ten", x=wf2.lzin.x)) - wf2.add(add2(name="add2", x=wf2.ten.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.plugin = plugin - - with Submitter(plugin=plugin) as sub: - sub(wf2) - - results2 = wf2.result() - assert 12 == results2.output.out - - # checking if the second wf runs again, but runs only one task - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() - # the second wf should rerun one task + assert not results1.errored, "\n".join(results1.errors["error message"]) + + assert 12 == results1.outputs.out + + @workflow.define + def Worky2(x, y): + + ten = workflow.add(Ten(x=x)) + add2 = workflow.add(Add2(x=ten.out), name="add2") + return add2.out + + worky2 = Worky2(x=2) + + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(worky2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) + + assert 12 == results2.outputs.out + + # checking if the second worky runs again, but runs only one task + assert results1.output_dir != results2.output_dir + # the second worky should rerun one task assert len(list(Path(cache_dir1).glob("F*"))) == 2 assert len(list(Path(cache_dir2).glob("F*"))) == 1 @pytest.mark.flaky(reruns=3) -def test_wf_nostate_nodecachelocations_upd(plugin, tmpdir): +def test_wf_nostate_nodecachelocations_upd(plugin, tmp_path): """ Two wfs with different input, but the second node has the same input; - the second wf has cache_locations (set after adding tasks) and should recompute, + the second worky has cache_locations (set after adding tasks) and should recompute, but without recomputing the second node """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") + + @workflow.define + def Worky1(x): + ten = workflow.add(Ten(x=x)) + add2 = workflow.add(Add2(x=ten.out), name="add2") + return add2.out + + worky1 = Worky1(x=3) + + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) - wf1 = Workflow(name="wf", input_spec=["x"], cache_dir=cache_dir1) - wf1.add(ten(name="ten", x=wf1.lzin.x)) - wf1.add(add2(name="add2", x=wf1.ten.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 3 - wf1.plugin = plugin + assert 12 == results1.outputs.out - with Submitter(plugin=plugin) as sub: - sub(wf1) + @workflow.define + def Worky2(x, y): + ten = workflow.add(Ten(x=x)) + add2 = workflow.add(Add2(x=ten.out), name="add2") + return add2.out - results1 = wf1.result() - assert 12 == results1.output.out + worky2 = Worky2(x=2) - wf2 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir2) - wf2.add(ten(name="ten", x=wf2.lzin.x)) - wf2.add(add2(name="add2", x=wf2.ten.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.plugin = plugin # updating cache_locations after adding the tasks - wf2.cache_locations = cache_dir1 + worky2.cache_locations = cache_dir1 - with Submitter(plugin=plugin) as sub: - sub(wf2) + with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: + results2 = sub(worky2) - results2 = wf2.result() - assert 12 == results2.output.out + assert not results2.errored, "\n".join(results2.errors["error message"]) - # checking if the second wf runs again, but runs only one task - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() - # the second wf should have only one task run + assert 12 == results2.outputs.out + + # checking if the second worky runs again, but runs only one task + assert results1.output_dir != results2.output_dir + # the second worky should have only one task run assert len(list(Path(cache_dir1).glob("F*"))) == 2 assert len(list(Path(cache_dir2).glob("F*"))) == 1 @pytest.mark.flaky(reruns=3) -def test_wf_state_cachelocations(plugin, tmpdir): +def test_wf_state_cachelocations(plugin, tmp_path): """ Two identical wfs (with states) with provided cache_dir; - the second wf has cache_locations and should not recompute the results + the second worky has cache_locations and should not recompute the results """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") + + @workflow.define + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - wf1.plugin = plugin + worky1 = Worky1().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert results1[0].output.out == 8 - assert results1[1].output.out == 82 + assert results1.outputs.out[0] == 8 + assert results1.outputs.out[1] == 82 - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - wf2.plugin = plugin + @workflow.define + def Worky2(x, y): + + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky2 = Worky2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(worky2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert results2[0].output.out == 8 - assert results2[1].output.out == 82 + assert results2.outputs.out[0] == 8 + assert results2.outputs.out[1] == 82 # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3228,62 +2833,60 @@ def test_wf_state_cachelocations(plugin, tmpdir): assert t2 < max(1, t1 - 1) # checking all directories - assert wf1.output_dir - for odir in wf1.output_dir: - assert odir.exists() - # checking if the second wf didn't run again + + # checking if the second worky didn't run again # checking all directories - assert wf2.output_dir - for odir in wf2.output_dir: + + for odir in worky2.output_dir: assert not odir.exists() @pytest.mark.flaky(reruns=3) -def test_wf_state_cachelocations_forcererun(plugin, tmpdir): +def test_wf_state_cachelocations_forcererun(plugin, tmp_path): """ Two identical wfs (with states) with provided cache_dir; - the second wf has cache_locations, + the second worky has cache_locations, but submitter is called with rerun=True, so should recompute """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") + + @workflow.define + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - wf1.plugin = plugin + worky1 = Worky1().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert results1[0].output.out == 8 - assert results1[1].output.out == 82 + assert results1.outputs.out[0] == 8 + assert results1.outputs.out[1] == 82 - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - wf2.plugin = plugin + @workflow.define + def Worky2(x, y): + + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky2 = Worky2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2, rerun=True) + with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: + results2 = sub(worky2, rerun=True) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert results2[0].output.out == 8 - assert results2[1].output.out == 82 + assert results2.outputs.out[0] == 8 + assert results2.outputs.out[1] == 82 # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3292,64 +2895,59 @@ def test_wf_state_cachelocations_forcererun(plugin, tmpdir): assert t2 > 2 # checking all directories - assert wf1.output_dir - for odir in wf1.output_dir: - assert odir.exists() - # checking if the second wf run again + + # checking if the second worky run again # checking all directories - assert wf2.output_dir - for odir in wf2.output_dir: - assert odir.exists() @pytest.mark.flaky(reruns=3) -def test_wf_state_cachelocations_updateinp(plugin, tmpdir): +def test_wf_state_cachelocations_updateinp(plugin, tmp_path): """ Two identical wfs (with states) with provided cache_dir; - the second wf has cache_locations and should not recompute the results + the second worky has cache_locations and should not recompute the results (the lazy input of the node is updated to the correct one, - i.e. the same as in wf1, after adding the node to the wf) + i.e. the same as in worky1, after adding the node to the worky) """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - wf1.plugin = plugin + @workflow.define + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky1 = Worky1().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert results1[0].output.out == 8 - assert results1[1].output.out == 82 + assert results1.outputs.out[0] == 8 + assert results1.outputs.out[1] == 82 - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - wf2.plugin = plugin - wf2.mult.inputs.y = wf2.lzin.y + @workflow.define + def Worky2(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky2 = Worky2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(worky2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert results2[0].output.out == 8 - assert results2[1].output.out == 82 + assert results2.outputs.out[0] == 8 + assert results2.outputs.out[1] == 82 # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3358,113 +2956,103 @@ def test_wf_state_cachelocations_updateinp(plugin, tmpdir): assert t2 < max(1, t1 - 1) # checking all directories - assert wf1.output_dir - for odir in wf1.output_dir: - assert odir.exists() - # checking if the second wf didn't run again + + # checking if the second worky didn't run again # checking all directories - assert wf2.output_dir - for odir in wf2.output_dir: - assert not odir.exists() @pytest.mark.flaky(reruns=3) -def test_wf_state_n_nostate_cachelocations(plugin, tmpdir): +def test_wf_state_n_nostate_cachelocations(plugin, tmp_path): """ Two wfs with provided cache_dir, the first one has no state, the second has; - the second wf has cache_locations and should not recompute only one element - """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") - - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin - - with Submitter(plugin=plugin) as sub: - sub(wf1) - - results1 = wf1.result() - assert results1.output.out == 8 - - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - wf2.plugin = plugin + the second worky has cache_locations and should not recompute only one element + """ + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") + + @workflow.define + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky1 = Worky1(x=2, y=3) + + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) + + assert results1.outputs.out == 8 + + @workflow.define + def Worky2(x, y): + + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out - with Submitter(plugin=plugin) as sub: - sub(wf2) + worky2 = Worky2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - results2 = wf2.result() - assert results2[0].output.out == 8 - assert results2[1].output.out == 82 + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(worky2) - # checking the directory from the first wf - assert wf1.output_dir.exists() - # checking directories from the second wf, only second element should be recomputed - assert not wf2.output_dir[0].exists() - assert wf2.output_dir[1].exists() + assert not results2.errored, "\n".join(results2.errors["error message"]) + assert results2.outputs.out[0] == 8 + assert results2.outputs.out[1] == 82 -def test_wf_nostate_cachelocations_updated(plugin, tmpdir): + +def test_wf_nostate_cachelocations_updated(plugin, tmp_path): """ Two identical wfs with provided cache_dir; - the second wf has cache_locations in init, + the second worky has cache_locations in init, that is later overwritten in Submitter.__call__; the cache_locations from call doesn't exist so the second task should run again """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir1_empty = tmpdir.mkdir("test_wf_cache3_empty") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir1_empty = tmp_path.mkdir("test_wf_cache3_empty") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") + + @workflow.define + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + worky1 = Worky1(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define + def Worky2(x, y): + + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky2 = Worky2(x=2, y=3) t0 = time.time() # changing cache_locations to non-existing dir - with Submitter(plugin=plugin) as sub: - sub(wf2, cache_locations=cache_dir1_empty) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1_empty + ) as sub: + results2 = sub(worky2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out + assert 8 == results2.outputs.out # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3472,113 +3060,111 @@ def test_wf_nostate_cachelocations_updated(plugin, tmpdir): assert t1 > 2 assert t2 > 2 - # checking if both wf run - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + # checking if both worky run + assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_recompute(plugin, tmpdir): +def test_wf_nostate_cachelocations_recompute(plugin, tmp_path): """ Two wfs with the same inputs but slightly different graph; - the second wf should recompute the results, - but the second node should use the results from the first wf (has the same input) - """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") - - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin - - with Submitter(plugin=plugin) as sub: - sub(wf1) - - results1 = wf1.result() - assert 8 == results1.output.out - - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - # different argument assignment - wf2.add(multiply(name="mult", x=wf2.lzin.y, y=wf2.lzin.x)) - wf2.add(add2(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + the second worky should recompute the results, + but the second node should use the results from the first worky (has the same input) + """ + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") + + @workflow.define + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + return add2.out + + worky1 = Worky1(x=2, y=3) + + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) + + assert 8 == results1.outputs.out + + @workflow.define + def Worky2(x, y): + + # different argument assignment + mult = workflow.add(Multiply(x=y, y=x), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") + return add2.out - with Submitter(plugin=plugin) as sub: - sub(wf2) + worky2 = Worky2(x=2, y=3) - results2 = wf2.result() - assert 8 == results2.output.out + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(worky2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) + + assert 8 == results2.outputs.out # checking if both dir exists - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + assert results1.output_dir != results2.output_dir - # the second wf should have only one task run + # the second worky should have only one task run assert len(list(Path(cache_dir1).glob("F*"))) == 2 assert len(list(Path(cache_dir2).glob("F*"))) == 1 @pytest.mark.flaky(reruns=3) -def test_wf_ndstate_cachelocations(plugin, tmpdir): +def test_wf_ndstate_cachelocations(plugin, tmp_path): """ Two wfs with identical inputs and node states; - the second wf has cache_locations and should not recompute the results + the second worky has cache_locations and should not recompute the results """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add( - multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) - ) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = [2, 20] - wf1.inputs.y = [3, 4] - wf1.plugin = plugin + @workflow.define + def Worky1(x, y): + mult = workflow.add( + Multiply().split(splitter=("x", "y"), x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky1 = Worky1(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert results1.output.out == [8, 82] + assert results1.outputs.out == [8, 82] - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add( - multiply(name="mult").split(splitter=("x", "y"), x=wf2.lzin.x, y=wf2.lzin.y) - ) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = [2, 20] - wf2.inputs.y = [3, 4] - wf2.plugin = plugin + @workflow.define + def Worky2(x, y): + + mult = workflow.add( + Multiply().split(splitter=("x", "y"), x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky2 = Worky2(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(worky2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert results2.output.out == [8, 82] + assert results2.outputs.out == [8, 82] # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3586,64 +3172,55 @@ def test_wf_ndstate_cachelocations(plugin, tmpdir): assert t1 > 2 assert t2 < max(1, t1 - 1) - # checking all directories - assert wf1.output_dir.exists() - - # checking if the second wf didn't run again - # checking all directories - assert not wf2.output_dir.exists() - @pytest.mark.flaky(reruns=3) -def test_wf_ndstate_cachelocations_forcererun(plugin, tmpdir): +def test_wf_ndstate_cachelocations_forcererun(plugin, tmp_path): """ Two wfs with identical inputs and node states; - the second wf has cache_locations, + the second worky has cache_locations, but submitter is called with rerun=True, so should recompute """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add( - multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) - ) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = [2, 20] - wf1.inputs.y = [3, 4] - wf1.plugin = plugin + @workflow.define + def Worky1(x, y): + mult = workflow.add( + Multiply().split(splitter=("x", "y"), x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky1 = Worky1(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert results1.output.out == [8, 82] + assert results1.outputs.out == [8, 82] - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add( - multiply(name="mult").split(splitter=("x", "y"), x=wf2.lzin.x, y=wf2.lzin.y) - ) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = [2, 20] - wf2.inputs.y = [3, 4] - wf2.plugin = plugin + @workflow.define + def Worky2(x, y): + + mult = workflow.add( + Multiply().split(splitter=("x", "y"), x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky2 = Worky2(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2, rerun=True) + with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: + results2 = sub(worky2, rerun=True) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert results2.output.out == [8, 82] + assert results2.outputs.out == [8, 82] # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3652,60 +3229,59 @@ def test_wf_ndstate_cachelocations_forcererun(plugin, tmpdir): assert t2 > 2 # checking all directories - assert wf1.output_dir.exists() - # checking if the second wf run again - assert wf2.output_dir.exists() + # checking if the second worky run again @pytest.mark.flaky(reruns=3) -def test_wf_ndstate_cachelocations_updatespl(plugin, tmpdir): +def test_wf_ndstate_cachelocations_updatespl(plugin, tmp_path): """ Two wfs with identical inputs and node state (that is set after adding the node!); - the second wf has cache_locations and should not recompute the results + the second worky has cache_locations and should not recompute the results """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add( - multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) - ) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = [2, 20] - wf1.inputs.y = [3, 4] - wf1.plugin = plugin + @workflow.define + def Worky1(x, y): + mult = workflow.add( + Multiply().split(splitter=("x", "y"), x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky1 = Worky1(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert results1.output.out == [8, 82] + assert results1.outputs.out == [8, 82] - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(multiply(name="mult")) - wf2.mult.split(splitter=("x", "y"), x=wf2.lzin.x, y=wf2.lzin.y) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = [2, 20] - wf2.inputs.y = [3, 4] - wf2.plugin = plugin + @workflow.define + def Worky2(x, y): + + mult = workflow.add( + Multiply().split(splitter=("x", "y"), x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky2 = Worky2(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(worky2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert results2.output.out == [8, 82] + assert results2.outputs.out == [8, 82] # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3714,62 +3290,60 @@ def test_wf_ndstate_cachelocations_updatespl(plugin, tmpdir): assert t2 < max(1, t1 - 1) # checking all directories - assert wf1.output_dir.exists() - # checking if the second wf didn't run again + # checking if the second worky didn't run again # checking all directories - assert not wf2.output_dir.exists() @pytest.mark.flaky(reruns=3) -def test_wf_ndstate_cachelocations_recompute(plugin, tmpdir): +def test_wf_ndstate_cachelocations_recompute(plugin, tmp_path): """ Two wfs (with nodes with states) with provided cache_dir; - the second wf has cache_locations and should not recompute the results + the second worky has cache_locations and should not recompute the results """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add( - multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) - ) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = [2, 20] - wf1.inputs.y = [3, 4] - wf1.plugin = plugin + @workflow.define + def Worky1(x, y): + mult = workflow.add( + Multiply().split(splitter=("x", "y"), x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky1 = Worky1(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert results1.output.out == [8, 82] + assert results1.outputs.out == [8, 82] - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add( - multiply(name="mult").split(splitter=["x", "y"], x=wf2.lzin.x, y=wf2.lzin.y) - ) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = [2, 20] - wf2.inputs.y = [3, 4] - wf2.plugin = plugin + @workflow.define + def Worky2(x, y): + + mult = workflow.add( + Multiply().split(splitter=["x", "y"], x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky2 = Worky2(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(worky2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 - results2 = wf2.result() - assert results2.output.out == [8, 10, 62, 82] + assert results2.outputs.out == [8, 10, 62, 82] # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3778,52 +3352,51 @@ def test_wf_ndstate_cachelocations_recompute(plugin, tmpdir): assert t2 > 2 # checking all directories - assert wf1.output_dir.exists() - # checking if the second wf didn't run again + # checking if the second worky didn't run again # checking all directories - assert wf2.output_dir.exists() @pytest.mark.flaky(reruns=3) -def test_wf_nostate_runtwice_usecache(plugin, tmpdir): +def test_wf_nostate_runtwice_usecache(plugin, tmp_path): """ running workflow (without state) twice, the second run should use the results from the first one """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + + @workflow.define + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + worky1 = Worky1(x=2, y=3) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out # checkoing output_dir after the first run - assert wf1.output_dir.exists() # saving the content of the cache dit after the first run - cache_dir_content = os.listdir(wf1.cache_dir) + cache_dir_content = os.listdir(worky1.cache_dir) # running workflow the second time t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t2 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out # checking if no new directory is created - assert cache_dir_content == os.listdir(wf1.cache_dir) + assert cache_dir_content == os.listdir(worky1.cache_dir) # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3832,46 +3405,49 @@ def test_wf_nostate_runtwice_usecache(plugin, tmpdir): assert t2 < max(1, t1 - 1) -def test_wf_state_runtwice_usecache(plugin, tmpdir): +def test_wf_state_runtwice_usecache(plugin, tmp_path): """ running workflow with a state twice, the second run should use the results from the first one """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 30]) - wf1.plugin = plugin + @workflow.define + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") + return add2.out + + worky1 = Worky1().split(splitter=("x", "y"), x=[2, 20], y=[3, 30]) t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1[0].output.out - assert 602 == results1[1].output.out + assert 8 == results1.outputs.out[0] + assert 602 == results1.outputs.out[1] # checkoing output_dir after the first run - assert [odir.exists() for odir in wf1.output_dir] + assert [odir.exists() for odir in worky1.output_dir] # saving the content of the cache dit after the first run - cache_dir_content = os.listdir(wf1.cache_dir) + cache_dir_content = os.listdir(worky1.cache_dir) # running workflow the second time t0 = time.time() - with Submitter(plugin=plugin) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(worky1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t2 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1[0].output.out - assert 602 == results1[1].output.out + assert 8 == results1.outputs.out[0] + assert 602 == results1.outputs.out[1] # checking if no new directory is created - assert cache_dir_content == os.listdir(wf1.cache_dir) + assert cache_dir_content == os.listdir(worky1.cache_dir) # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": # checking the execution time @@ -3881,589 +3457,459 @@ def test_wf_state_runtwice_usecache(plugin, tmpdir): @pytest.fixture def create_tasks(): - wf = Workflow(name="wf", input_spec=["x"]) - wf.inputs.x = 1 - wf.add(add2(name="t1", x=wf.lzin.x)) - wf.add(multiply(name="t2", x=wf.t1.lzout.out, y=2)) - wf.set_output([("out", wf.t2.lzout.out)]) - t1 = wf.name2obj["t1"] - t2 = wf.name2obj["t2"] - return wf, t1, t2 + @workflow.define + def Worky(x): + t1 = workflow.add(Add2(x=x), name="t1") + t2 = workflow.add(Multiply(x=t1.out, y=2), name="t2") + return t2.out + + worky = Worky(x=1) + workflow_obj = pydra.engine.core.Workflow.construct(worky) + t1 = workflow_obj["t1"] + t2 = workflow_obj["t2"] + return worky, t1, t2 -def test_cache_propagation1(tmpdir, create_tasks): +def test_cache_propagation1(tmp_path, create_tasks): """No cache set, all independent""" - wf, t1, t2 = create_tasks - wf(plugin="cf") - assert wf.cache_dir == t1.cache_dir == t2.cache_dir - wf.cache_dir = (tmpdir / "shared").strpath - wf(plugin="cf") - assert wf.cache_dir == t1.cache_dir == t2.cache_dir + worky, t1, t2 = create_tasks + worky(plugin="cf") + assert wf["cache_dir"] == t1.cache_dir == t2.cache_dir + worky.cache_dir = (tmp_path / "shared").strpath + worky(plugin="cf") + assert wf["cache_dir"] == t1.cache_dir == t2.cache_dir -def test_cache_propagation2(tmpdir, create_tasks): +def test_cache_propagation2(tmp_path, create_tasks): """Task explicitly states no inheriting""" - wf, t1, t2 = create_tasks - wf.cache_dir = (tmpdir / "shared").strpath + worky, t1, t2 = create_tasks + worky.cache_dir = (tmp_path / "shared").strpath t2.allow_cache_override = False - wf(plugin="cf") - assert wf.cache_dir == t1.cache_dir != t2.cache_dir + worky(plugin="cf") + assert wf["cache_dir"] == t1.cache_dir != t2.cache_dir -def test_cache_propagation3(tmpdir, create_tasks): +def test_cache_propagation3(tmp_path, create_tasks): """Shared cache_dir with state""" - wf, t1, t2 = create_tasks - wf.split("x", x=[1, 2]) - wf.cache_dir = (tmpdir / "shared").strpath - wf(plugin="cf") - assert wf.cache_dir == t1.cache_dir == t2.cache_dir - - -def test_workflow_combine1(tmpdir): - wf1 = Workflow(name="wf1", input_spec=["a", "b"], a=[1, 2], b=[2, 3]) - wf1.add(power(name="power").split(["a", "b"], a=wf1.lzin.a, b=wf1.lzin.b)) - wf1.add(identity(name="identity1", x=wf1.power.lzout.out).combine("power.a")) - wf1.add(identity(name="identity2", x=wf1.identity1.lzout.out).combine("power.b")) - wf1.set_output( - { - "out_pow": wf1.power.lzout.out, - "out_iden1": wf1.identity1.lzout.out, - "out_iden2": wf1.identity2.lzout.out, - } - ) - wf1.cache_dir = tmpdir - result = wf1() - - assert result.output.out_pow == [1, 1, 4, 8] - assert result.output.out_iden1 == [[1, 4], [1, 8]] - assert result.output.out_iden2 == [[1, 4], [1, 8]] - - -def test_workflow_combine2(tmpdir): - wf1 = Workflow(name="wf1", input_spec=["a", "b"], a=[1, 2], b=[2, 3]) - wf1.add( - power(name="power").split(["a", "b"], a=wf1.lzin.a, b=wf1.lzin.b).combine("a") - ) - wf1.add(identity(name="identity", x=wf1.power.lzout.out).combine("power.b")) - wf1.set_output({"out_pow": wf1.power.lzout.out, "out_iden": wf1.identity.lzout.out}) - wf1.cache_dir = tmpdir - result = wf1() - - assert result.output.out_pow == [[1, 4], [1, 8]] - assert result.output.out_iden == [[1, 4], [1, 8]] - - -# testing lzout.all to collect all of the results and let FunctionTask deal with it - - -def test_wf_lzoutall_1(plugin, tmpdir): - """workflow with 2 tasks, no splitter - passing entire result object to add2_sub2_res function - by using lzout.all syntax - """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2_sub2_res(name="add_sub", res=wf.mult.lzout.all_)) - wf.set_output([("out", wf.add_sub.lzout.out_add)]) - wf.inputs.x = 2 - wf.inputs.y = 3 - wf.cache_dir = tmpdir + worky, t1, t2 = create_tasks + worky = wf["split"]("x", x=[1, 2]) + worky.cache_dir = (tmp_path / "shared").strpath + worky(plugin="cf") + assert wf["cache_dir"] == t1.cache_dir == t2.cache_dir + + +def test_workflow_combine1(tmp_path): + @workflow.define(outputs=["out_pow", "out_iden1", "out_iden2"]) + def Worky1(a, b): + power = workflow.add(Power().split(["a", "b"], a=a, b=b)) + identity1 = workflow.add( + Identity(x=power.out).combine("power.a"), name="identity1" + ) + identity2 = workflow.add( + Identity(x=identity1.out).combine("power.b"), name="identity2" + ) + return power.out, identity1.out, identity2.out - with Submitter(plugin=plugin) as sub: - sub(wf) + worky1 = Worky1(a=[1, 2], b=[2, 3]) + outputs = worky1() - assert wf.output_dir.exists() - results = wf.result() - assert 8 == results.output.out + assert outputs.out_pow == [1, 1, 4, 8] + assert outputs.out_iden1 == [[1, 4], [1, 8]] + assert outputs.out_iden2 == [[1, 4], [1, 8]] -def test_wf_lzoutall_1a(plugin, tmpdir): - """workflow with 2 tasks, no splitter - passing entire result object to add2_res function - by using lzout.all syntax in the node connections and for wf output - """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2_sub2_res(name="add_sub", res=wf.mult.lzout.all_)) - wf.set_output([("out_all", wf.add_sub.lzout.all_)]) - wf.inputs.x = 2 - wf.inputs.y = 3 - wf.cache_dir = tmpdir +def test_workflow_combine2(tmp_path): + @workflow.define(outputs=["out_pow", "out_iden"]) + def Worky1(a, b): + power = workflow.add(Power().split(["a", "b"], a=a, b=b).combine("a")) + identity = workflow.add(Identity(x=power.out).combine("power.b")) + return power.out, identity.out - with Submitter(plugin=plugin) as sub: - sub(wf) + worky1 = Worky1(a=[1, 2], b=[2, 3]) + outputs = worky1(cache_dir=tmp_path) - assert wf.output_dir.exists() - results = wf.result() - assert results.output.out_all == {"out_add": 8, "out_sub": 4} + assert outputs.out_pow == [[1, 4], [1, 8]] + assert outputs.out_iden == [[1, 4], [1, 8]] -def test_wf_lzoutall_st_1(plugin, tmpdir): - """workflow with 2 tasks, no splitter - passing entire result object to add2_res function - by using lzout.all syntax - """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2_sub2_res(name="add_sub", res=wf.mult.lzout.all_)) - wf.set_output([("out_add", wf.add_sub.lzout.out_add)]) - wf.inputs.x = [2, 20] - wf.inputs.y = [3, 30] - wf.plugin = plugin - wf.cache_dir = tmpdir +def test_wf_resultfile_1(plugin, tmp_path): + """workflow with a file in the result, file should be copied to the worky dir""" - with Submitter(plugin=plugin) as sub: - sub(wf) + @workflow.define + def Worky(x): + writefile = workflow.add(FunWriteFile(filename=x)) - assert wf.output_dir.exists() - results = wf.result() - assert results.output.out_add == [8, 62, 62, 602] + return writefile.out # wf_out + worky = Worky(x="file_1.txt") + outputs = worky(worker=plugin, cache_dir=tmp_path) -def test_wf_lzoutall_st_1a(plugin, tmpdir): - """workflow with 2 tasks, no splitter - passing entire result object to add2_res function - by using lzout.all syntax - """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2_sub2_res(name="add_sub", res=wf.mult.lzout.all_)) - wf.set_output([("out_all", wf.add_sub.lzout.all_)]) - wf.inputs.x = [2, 20] - wf.inputs.y = [3, 30] - wf.plugin = plugin - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) - - assert wf.output_dir.exists() - results = wf.result() - assert results.output.out_all == [ - {"out_add": 8, "out_sub": 4}, - {"out_add": 62, "out_sub": 58}, - {"out_add": 62, "out_sub": 58}, - {"out_add": 602, "out_sub": 598}, - ] + # checking if the file exists and if it is in the Worky directory + wf_out = outputs.wf_out.fspath + wf_out.exists() + assert wf_out == wf["output_dir"] / "file_1.txt" -def test_wf_lzoutall_st_2(plugin, tmpdir): - """workflow with 2 tasks, no splitter - passing entire result object to add2_res function - by using lzout.all syntax - """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add( - multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y).combine("x") - ) - wf.add(add2_sub2_res_list(name="add_sub", res=wf.mult.lzout.all_)) - wf.set_output([("out_add", wf.add_sub.lzout.out_add)]) - wf.inputs.x = [2, 20] - wf.inputs.y = [3, 30] - wf.plugin = plugin - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) - - assert wf.output_dir.exists() - results = wf.result() - assert results.output.out_add[0] == [8, 62] - assert results.output.out_add[1] == [62, 602] - - -@pytest.mark.xfail( - condition=bool(shutil.which("sbatch")), # using SLURM - reason=( - "Not passing on SLURM image for some reason, hoping upgrade of image/Python " - "version fixes it" - ), -) -def test_wf_lzoutall_st_2a(plugin, tmpdir): - """workflow with 2 tasks, no splitter - passing entire result object to add2_res function - by using lzout.all syntax +def test_wf_resultfile_2(plugin, tmp_path): + """workflow with a list of files in the worky result, + all files should be copied to the worky dir """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add( - multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y).combine("x") - ) - wf.add(add2_sub2_res_list(name="add_sub", res=wf.mult.lzout.all_)) - wf.set_output([("out_all", wf.add_sub.lzout.all_)]) - wf.inputs.x = [2, 20] - wf.inputs.y = [3, 30] - wf.plugin = plugin - wf.cache_dir = tmpdir - - with Submitter(plugin=plugin) as sub: - sub(wf) - - assert wf.output_dir.exists() - results = wf.result() - assert results.output.out_all == [ - {"out_add": [8, 62], "out_sub": [4, 58]}, - {"out_add": [62, 602], "out_sub": [58, 598]}, - ] + @workflow.define + def Worky(x): + writefile = workflow.add(FunWriteFileList(filename_list=x)) -# workflows that have files in the result, the files should be copied to the wf dir + return writefile.out # wf_out + file_list = ["file_1.txt", "file_2.txt", "file_3.txt"] + worky = Worky(x=file_list) + outputs = worky(worker=plugin, cache_dir=tmp_path) -def test_wf_resultfile_1(plugin, tmpdir): - """workflow with a file in the result, file should be copied to the wf dir""" - wf = Workflow(name="wf_file_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_write_file(name="writefile", filename=wf.lzin.x)) - wf.inputs.x = "file_1.txt" - wf.plugin = plugin - wf.set_output([("wf_out", wf.writefile.lzout.out)]) - - with Submitter(plugin=plugin) as sub: - sub(wf) - - results = wf.result() - # checking if the file exists and if it is in the Workflow directory - wf_out = results.output.wf_out.fspath - wf_out.exists() - assert wf_out == wf.output_dir / "file_1.txt" + # checking if the file exists and if it is in the Worky directory + for ii, file in enumerate(outputs.wf_out): + assert file.fspath.exists() + assert file.fspath == wf["output_dir"] / file_list[ii] -def test_wf_resultfile_2(plugin, tmpdir): - """workflow with a list of files in the wf result, - all files should be copied to the wf dir +def test_wf_resultfile_3(plugin, tmp_path): + """workflow with a dictionaries of files in the worky result, + all files should be copied to the worky dir """ - wf = Workflow(name="wf_file_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_write_file_list(name="writefile", filename_list=wf.lzin.x)) - file_list = ["file_1.txt", "file_2.txt", "file_3.txt"] - wf.inputs.x = file_list - wf.plugin = plugin - wf.set_output([("wf_out", wf.writefile.lzout.out)]) - with Submitter(plugin=plugin) as sub: - sub(wf) - - results = wf.result() - # checking if the file exists and if it is in the Workflow directory - for ii, file in enumerate(results.output.wf_out): - assert file.fspath.exists() - assert file.fspath == wf.output_dir / file_list[ii] + @workflow.define + def Worky(x): + writefile = workflow.add(FunWriteFileList2Dict(filename_list=x)) + return writefile.out # wf_out -def test_wf_resultfile_3(plugin, tmpdir): - """workflow with a dictionaries of files in the wf result, - all files should be copied to the wf dir - """ - wf = Workflow(name="wf_file_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_write_file_list2dict(name="writefile", filename_list=wf.lzin.x)) file_list = ["file_1.txt", "file_2.txt", "file_3.txt"] - wf.inputs.x = file_list - wf.plugin = plugin - wf.set_output([("wf_out", wf.writefile.lzout.out)]) - - with Submitter(plugin=plugin) as sub: - sub(wf) + worky = Worky(x=file_list) + outputs = worky(worker=plugin, cache_dir=tmp_path) - results = wf.result() - # checking if the file exists and if it is in the Workflow directory - for key, val in results.output.wf_out.items(): + # checking if the file exists and if it is in the Worky directory + for key, val in outputs.wf_out.items(): if key == "random_int": assert val == 20 else: assert val.fspath.exists() ii = int(key.split("_")[1]) - assert val.fspath == wf.output_dir / file_list[ii] + assert val.fspath == wf["output_dir"] / file_list[ii] -def test_wf_upstream_error1(plugin, tmpdir): +def test_wf_upstream_error1(plugin, tmp_path): """workflow with two tasks, task2 dependent on an task1 which raised an error""" - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = "hi" # TypeError for adding str and int - wf.plugin = plugin - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) - wf.set_output([("out", wf.addvar2.lzout.out)]) + + @workflow.define + def Worky(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") + return addvar2.out + + worky = Worky(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) -def test_wf_upstream_error2(plugin, tmpdir): +def test_wf_upstream_error2(plugin, tmp_path): """task2 dependent on task1, task1 errors, workflow-level split on task 1 goal - workflow finish running, one output errors but the other doesn't """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) - wf.split("x", x=[1, "hi"]) # workflow-level split TypeError for adding str and int - wf.plugin = plugin - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) - wf.set_output([("out", wf.addvar2.lzout.out)]) + + @workflow.define + def Worky(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") + + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") + return addvar2.out + + worky = Worky().split( + "x", x=[1, "hi"] + ) # workflow-level split TypeError for adding str and int with pytest.raises(Exception) as excinfo: - with Submitter(plugin=plugin) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @pytest.mark.flaky(reruns=2) # when slurm -def test_wf_upstream_error3(plugin, tmpdir): +def test_wf_upstream_error3(plugin, tmp_path): """task2 dependent on task1, task1 errors, task-level split on task 1 goal - workflow finish running, one output errors but the other doesn't """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1")) - wf.inputs.x = [1, "hi"] # TypeError for adding str and int - wf.addvar1.split("a", a=wf.lzin.x) # task-level split - wf.plugin = plugin - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) - wf.set_output([("out", wf.addvar2.lzout.out)]) + @workflow.define + def Worky(x): + addvar1 = workflow.add(FunAddVarDefaultNoType().split("a", a=x), name="addvar1") + + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") + return addvar2.out + + worky = Worky(x=[1, "hi"]) # TypeError for adding str and int with pytest.raises(Exception) as excinfo: - with Submitter(plugin=plugin) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) -def test_wf_upstream_error4(plugin, tmpdir): +def test_wf_upstream_error4(plugin, tmp_path): """workflow with one task, which raises an error""" - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = "hi" # TypeError for adding str and int - wf.plugin = plugin - wf.set_output([("out", wf.addvar1.lzout.out)]) + @workflow.define + def Worky(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + + return addvar1.out + + worky = Worky(x="hi") # TypeError for adding str and int with pytest.raises(Exception) as excinfo: - with Submitter(plugin=plugin) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "raised an error" in str(excinfo.value) assert "addvar1" in str(excinfo.value) -def test_wf_upstream_error5(plugin, tmpdir): +def test_wf_upstream_error5(plugin, tmp_path): """nested workflow with one task, which raises an error""" - wf_main = Workflow(name="wf_main", input_spec=["x"], cache_dir=tmpdir) - wf = Workflow(name="wf", input_spec=["x"], x=wf_main.lzin.x) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) - wf.plugin = plugin - wf.set_output([("wf_out", wf.addvar1.lzout.out)]) - wf_main.add(wf) - wf_main.inputs.x = "hi" # TypeError for adding str and int - wf_main.set_output([("out", wf_main.wf.lzout.wf_out)]) + @workflow.define + def Worky(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + return addvar1.out # wf_out + + @workflow.define + def WfMain(x): + worky = workflow.add(Worky(x=x)) + return wf["out"] + + wf_main = WfMain(x="hi") # TypeError for adding str and int with pytest.raises(Exception) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: sub(wf_main) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) -def test_wf_upstream_error6(plugin, tmpdir): +def test_wf_upstream_error6(plugin, tmp_path): """nested workflow with two tasks, the first one raises an error""" - wf_main = Workflow(name="wf_main", input_spec=["x"], cache_dir=tmpdir) - wf = Workflow(name="wf", input_spec=["x"], x=wf_main.lzin.x) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) - wf.plugin = plugin - wf.set_output([("wf_out", wf.addvar2.lzout.out)]) - wf_main.add(wf) - wf_main.inputs.x = "hi" # TypeError for adding str and int - wf_main.set_output([("out", wf_main.wf.lzout.wf_out)]) + @workflow.define + def Worky(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") + + return addvar2.out # wf_out + + @workflow.define + def WfMain(x): + worky = workflow.add(Worky(x=x)) + return wf["out"] + + wf_main = WfMain(x="hi") # TypeError for adding str and int with pytest.raises(Exception) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: sub(wf_main) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) -def test_wf_upstream_error7(plugin, tmpdir): +def test_wf_upstream_error7(plugin, tmp_path): """ workflow with three sequential tasks, the first task raises an error the last task is set as the workflow output """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = "hi" # TypeError for adding str and int - wf.plugin = plugin - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default_notype(name="addvar3", a=wf.addvar2.lzout.out)) - wf.set_output([("out", wf.addvar3.lzout.out)]) + + @workflow.define + def Worky(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") + + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") + addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out), name="addvar3") + return addvar3.out + + worky = Worky(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) - assert wf.addvar1._errored is True - assert wf.addvar2._errored == wf.addvar3._errored == ["addvar1"] + assert wf["addvar1"]._errored is True + assert wf["addvar2"]._errored == wf["addvar3"]._errored == ["addvar1"] -def test_wf_upstream_error7a(plugin, tmpdir): +def test_wf_upstream_error7a(plugin, tmp_path): """ workflow with three sequential tasks, the first task raises an error the second task is set as the workflow output """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = "hi" # TypeError for adding str and int - wf.plugin = plugin - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default_notype(name="addvar3", a=wf.addvar2.lzout.out)) - wf.set_output([("out", wf.addvar2.lzout.out)]) + @workflow.define + def Worky(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") + + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") + addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out), name="addvar3") + return addvar3.out + + worky = Worky(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) - assert wf.addvar1._errored is True - assert wf.addvar2._errored == wf.addvar3._errored == ["addvar1"] + assert wf["addvar1"]._errored is True + assert wf["addvar2"]._errored == wf["addvar3"]._errored == ["addvar1"] -def test_wf_upstream_error7b(plugin, tmpdir): +def test_wf_upstream_error7b(plugin, tmp_path): """ workflow with three sequential tasks, the first task raises an error the second and the third tasks are set as the workflow output """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = "hi" # TypeError for adding str and int - wf.plugin = plugin - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default_notype(name="addvar3", a=wf.addvar2.lzout.out)) - wf.set_output([("out1", wf.addvar2.lzout.out), ("out2", wf.addvar3.lzout.out)]) + @workflow.define(outputs=["out1", "out2"]) + def Worky(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") + + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") + addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out), name="addvar3") + return addvar2.out, addvar3.out # + + worky = Worky(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) - assert wf.addvar1._errored is True - assert wf.addvar2._errored == wf.addvar3._errored == ["addvar1"] + assert wf["addvar1"]._errored is True + assert wf["addvar2"]._errored == wf["addvar3"]._errored == ["addvar1"] -def test_wf_upstream_error8(plugin, tmpdir): +def test_wf_upstream_error8(plugin, tmp_path): """workflow with three tasks, the first one raises an error, so 2 others are removed""" - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = "hi" # TypeError for adding str and int - wf.plugin = plugin - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) - wf.add(fun_addtwo(name="addtwo", a=wf.addvar1.lzout.out)) - wf.set_output([("out1", wf.addvar2.lzout.out), ("out2", wf.addtwo.lzout.out)]) + @workflow.define(outputs=["out1", "out2"]) + def Worky(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") + + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") + addtwo = workflow.add(FunAddTwo(a=addvar1.out)) + return addvar2.out, addtwo.out # + + worky = Worky(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) - assert wf.addvar1._errored is True - assert wf.addvar2._errored == wf.addtwo._errored == ["addvar1"] + assert wf["addvar1"]._errored is True + assert wf["addvar2"]._errored == wf["addtwo"]._errored == ["addvar1"] -def test_wf_upstream_error9(plugin, tmpdir): +def test_wf_upstream_error9(plugin, tmp_path): """ workflow with five tasks with two "branches", one branch has an error, the second is fine the errored branch is connected to the workflow output """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = 2 - wf.add(fun_addvar_notype(name="err", a=wf.addvar1.lzout.out, b="hi")) - wf.add(fun_addvar_default_notype(name="follow_err", a=wf.err.lzout.out)) - wf.add(fun_addtwo_notype(name="addtwo", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addtwo.lzout.out)) - wf.set_output([("out1", wf.follow_err.lzout.out)]) + @workflow.define + def Worky(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") + + err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi"), name="err") + follow_err = workflow.add(FunAddVarDefaultNoType(a=err.out), name="follow_err") - wf.plugin = plugin + addtwo = workflow.add(FunAddTwoNoType(a=addvar1.out), name="addtwo") + workflow.add(FunAddVarDefaultNoType(a=addtwo.out)) + return follow_err.out # out1 + + worky = Worky(x=2) with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "err" in str(excinfo.value) assert "raised an error" in str(excinfo.value) - assert wf.err._errored is True - assert wf.follow_err._errored == ["err"] + assert wf["err"]._errored is True + assert wf["follow_err"]._errored == ["err"] -def test_wf_upstream_error9a(plugin, tmpdir): +def test_wf_upstream_error9a(plugin, tmp_path): """ workflow with five tasks with two "branches", one branch has an error, the second is fine the branch without error is connected to the workflow output so the workflow finished clean """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = 2 - wf.add(fun_addvar_notype(name="err", a=wf.addvar1.lzout.out, b="hi")) - wf.add(fun_addvar_default(name="follow_err", a=wf.err.lzout.out)) - wf.add(fun_addtwo_notype(name="addtwo", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default(name="addvar2", a=wf.addtwo.lzout.out)) - wf.set_output([("out1", wf.addvar2.lzout.out)]) # , ("out2", wf.addtwo.lzout.out)]) + @workflow.define + def Worky(x): + addvar1 = workflow.add(FunAddVarDefault(a=x), name="addvar1") + + err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi"), name="err") + workflow.add(FunAddVarDefault(a=err.out)) - wf.plugin = plugin - with Submitter(plugin=plugin) as sub: - sub(wf) - assert wf.err._errored is True - assert wf.follow_err._errored == ["err"] + addtwo = workflow.add(FunAddTwoNoType(a=addvar1.out), name="addtwo") + addvar2 = workflow.add(FunAddVarDefault(a=addtwo.out), name="addvar2") + return addvar2.out # out1 # , ("out2", addtwo.out)]) + worky = Worky(x=2) -def test_wf_upstream_error9b(plugin, tmpdir): + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + sub(worky) + assert wf["err"]._errored is True + assert wf["follow_err"]._errored == ["err"] + + +def test_wf_upstream_error9b(plugin, tmp_path): """ workflow with five tasks with two "branches", one branch has an error, the second is fine both branches are connected to the workflow output """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = 2 - wf.add(fun_addvar_notype(name="err", a=wf.addvar1.lzout.out, b="hi")) - wf.add(fun_addvar_default_notype(name="follow_err", a=wf.err.lzout.out)) - wf.add(fun_addtwo_notype(name="addtwo", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addtwo.lzout.out)) - wf.set_output([("out1", wf.follow_err.lzout.out), ("out2", wf.addtwo.lzout.out)]) + @workflow.define(outputs=["out1", "out2"]) + def Worky(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") + + err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi"), name="err") + follow_err = workflow.add(FunAddVarDefaultNoType(a=err.out), name="follow_err") + + addtwo = workflow.add(FunAddTwoNoType(a=addvar1.out), name="addtwo") + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addtwo.out), name="addvar2") + return follow_err.out, addvar2.out - wf.plugin = plugin + worky = Worky(x=2) with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "err" in str(excinfo.value) assert "raised an error" in str(excinfo.value) - assert wf.err._errored is True - assert wf.follow_err._errored == ["err"] + assert wf["err"]._errored is True + assert wf["follow_err"]._errored == ["err"] -def exporting_graphs(wf, name): +def exporting_graphs(worky, name, out_dir): """helper function to run dot to create png/pdf files from dotfiles""" # exporting the simple graph - dotfile_pr, formatted_dot = wf.create_dotfile(export=True, name=name) + dotfile_pr, formatted_dot = plot_workflow(worky, out_dir, export=True, name=name) assert len(formatted_dot) == 1 assert formatted_dot[0] == dotfile_pr.with_suffix(".png") assert formatted_dot[0].exists() print("\n png of a simple graph in: ", formatted_dot[0]) # exporting nested graph - dotfile_pr, formatted_dot = wf.create_dotfile( - type="nested", export=["pdf", "png"], name=f"{name}_nest" + dotfile_pr, formatted_dot = plot_workflow( + worky, out_dir, type="nested", export=["pdf", "png"], name=f"{name}_nest" ) assert len(formatted_dot) == 2 assert formatted_dot[0] == dotfile_pr.with_suffix(".pdf") assert formatted_dot[0].exists() print("\n pdf of the nested graph in: ", formatted_dot[0]) # detailed graph - dotfile_pr, formatted_dot = wf.create_dotfile( - type="detailed", export="pdf", name=f"{name}_det" + dotfile_pr, formatted_dot = plot_workflow( + worky, out_dir, type="detailed", export="pdf", name=f"{name}_det" ) assert len(formatted_dot) == 1 assert formatted_dot[0] == dotfile_pr.with_suffix(".pdf") @@ -4472,33 +3918,64 @@ def exporting_graphs(wf, name): @pytest.mark.parametrize("splitter", [None, "x"]) -def test_graph_1(tmpdir, splitter): - """creating a set of graphs, wf with two nodes""" - wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(multiply(name="mult_1", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(multiply(name="mult_2", x=wf.lzin.x, y=wf.lzin.x)) - wf.add(add2(name="add2", x=wf.mult_1.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.split(splitter, x=[1, 2]) +def test_graph_simple(tmp_path, splitter): + """creating a set of graphs, worky with two nodes""" + + @workflow.define + def Worky(x=1, y=2): + mult_1 = workflow.add(Multiply(x=x, y=y), name="mult_1") + workflow.add(Multiply(x=x, y=x), name="mult_2") + add2 = workflow.add(Add2(x=mult_1.out), name="add2") + return add2.out + + worky = Worky().split(splitter, x=[1, 2]) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, tmp_path, name="simple") dotstr_s_lines = dotfile_s.read_text().split("\n") assert "mult_1" in dotstr_s_lines assert "mult_2" in dotstr_s_lines assert "add2" in dotstr_s_lines assert "mult_1 -> add2" in dotstr_s_lines + +@pytest.mark.parametrize("splitter", [None, "x"]) +def test_graph_nested(tmp_path, splitter): + """creating a set of graphs, worky with two nodes""" + + @workflow.define + def Worky(x=1, y=2): + mult_1 = workflow.add(Multiply(x=x, y=y), name="mult_1") + workflow.add(Multiply(x=x, y=x), name="mult_2") + add2 = workflow.add(Add2(x=mult_1.out), name="add2") + return add2.out + + worky = Worky().split(splitter, x=[1, 2]) + # nested graph (should have the same elements) - dotfile_n = wf.create_dotfile(type="nested") + dotfile_n = plot_workflow(worky, tmp_path, type="nested", name="nested") dotstr_n_lines = dotfile_n.read_text().split("\n") assert "mult_1" in dotstr_n_lines assert "mult_2" in dotstr_n_lines assert "add2" in dotstr_n_lines assert "mult_1 -> add2" in dotstr_n_lines + +@pytest.mark.parametrize("splitter", [None, "x"]) +def test_graph_detailed(tmp_path, splitter): + """creating a set of graphs, worky with two nodes""" + + @workflow.define + def Worky(x=1, y=2): + mult_1 = workflow.add(Multiply(x=x, y=y), name="mult_1") + workflow.add(Multiply(x=x, y=x), name="mult_2") + add2 = workflow.add(Add2(x=mult_1.out), name="add2") + return add2.out + + worky = Worky().split(splitter, x=[1, 2]) + # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, tmp_path, type="detailed", name="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x | y}}"];' @@ -4506,24 +3983,41 @@ def test_graph_1(tmpdir, splitter): ) assert "struct_mult_1:out -> struct_add2:x;" in dotstr_d_lines - # exporting graphs if dot available - if DOT_FLAG: - name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) +@pytest.mark.skipif(not DOT_FLAG, reason="dot not available") +@pytest.mark.parametrize("splitter", [None, "x"]) +def test_graph_export_dot(tmp_path, splitter): + """creating a set of graphs, worky with two nodes""" + + @workflow.define + def Worky(x=1, y=2): + mult_1 = workflow.add(Multiply(x=x, y=y), name="mult_1") + workflow.add(Multiply(x=x, y=x), name="mult_2") + add2 = workflow.add(Add2(x=mult_1.out), name="add2") + return add2.out -def test_graph_1st(tmpdir): - """creating a set of graphs, wf with two nodes + worky = Worky().split(splitter, x=[1, 2]) + + name = f"graph_{sys._getframe().f_code.co_name}" + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) + + +def test_graph_1st(tmp_path): + """creating a set of graphs, worky with two nodes some nodes have splitters, should be marked with blue color """ - wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(multiply(name="mult_1", y=wf.lzin.y).split("x", x=wf.lzin.x)) - wf.add(multiply(name="mult_2", x=wf.lzin.x, y=wf.lzin.x)) - wf.add(add2(name="add2", x=wf.mult_1.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) + + @workflow.define + def Worky(x, y): + mult_1 = workflow.add(Multiply(y=y).split("x", x=x), name="mult_1") + workflow.add(Multiply(x=x, y=x), name="mult_2") + add2 = workflow.add(Add2(x=mult_1.out), name="add2") + return add2.out + + worky = Worky(x=[1, 2], y=2) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "mult_1 [color=blue]" in dotstr_s_lines assert "mult_2" in dotstr_s_lines @@ -4531,7 +4025,7 @@ def test_graph_1st(tmpdir): assert "mult_1 -> add2 [color=blue]" in dotstr_s_lines # nested graph - dotfile_n = wf.create_dotfile(type="nested") + dotfile_n = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_n_lines = dotfile_n.read_text().split("\n") assert "mult_1 [color=blue]" in dotstr_n_lines assert "mult_2" in dotstr_n_lines @@ -4539,7 +4033,7 @@ def test_graph_1st(tmpdir): assert "mult_1 -> add2 [color=blue]" in dotstr_n_lines # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x | y}}"];' @@ -4549,21 +4043,25 @@ def test_graph_1st(tmpdir): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) -def test_graph_1st_cmb(tmpdir): - """creating a set of graphs, wf with three nodes +def test_graph_1st_cmb(tmp_path): + """creating a set of graphs, worky with three nodes the first one has a splitter, the second has a combiner, so the third one is stateless first two nodes should be blue and the arrow between them should be blue """ - wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(multiply(name="mult", y=wf.lzin.y).split("x", x=wf.lzin.x)) - wf.add(add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) - wf.add(list_sum(name="sum", x=wf.add2.lzout.out)) - wf.set_output([("out", wf.sum.lzout.out)]) + + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(y=y).split("x", x=x), name="mult") + add2 = workflow.add(Add2(x=mult.out).combine("mult.x"), name="add2") + sum = workflow.add(ListSum(x=add2.out), name="sum") + return sum.out + + worky = Worky(x=[1, 2], y=2) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "mult [color=blue]" in dotstr_s_lines assert "add2 [color=blue]" in dotstr_s_lines @@ -4572,7 +4070,7 @@ def test_graph_1st_cmb(tmpdir): assert "add2 -> sum" in dotstr_s_lines # nested graph - dotfile_n = wf.create_dotfile(type="nested") + dotfile_n = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_n_lines = dotfile_n.read_text().split("\n") assert "mult [color=blue]" in dotstr_n_lines assert "add2 [color=blue]" in dotstr_n_lines @@ -4581,7 +4079,7 @@ def test_graph_1st_cmb(tmpdir): assert "add2 -> sum" in dotstr_n_lines # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x | y}}"];' @@ -4591,31 +4089,37 @@ def test_graph_1st_cmb(tmpdir): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) + +def test_graph_2(tmp_path): + """creating a graph, worky with one workflow as a node""" -def test_graph_2(tmpdir): - """creating a graph, wf with one workflow as a node""" - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + + @workflow.define + def Worky(x): + wfnd = workflow.add(Wfnd(x=x), name="wfnd") + return wfnd.out + + worky = Worky(x=2) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "wfnd [shape=box]" in dotstr_s_lines # nested graph - dotfile = wf.create_dotfile(type="nested") + dotfile = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_lines = dotfile.read_text().split("\n") assert "subgraph cluster_wfnd {" in dotstr_lines assert "add2" in dotstr_lines # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x}}"];' in dotstr_d_lines @@ -4623,34 +4127,40 @@ def test_graph_2(tmpdir): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) -def test_graph_2st(tmpdir): - """creating a set of graphs, wf with one workflow as a node +def test_graph_2st(tmp_path): + """creating a set of graphs, worky with one workflow as a node the inner workflow has a state, so should be blue """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wfnd = Workflow(name="wfnd", input_spec=["x"]).split("x", x=wf.lzin.x) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) + + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + + @workflow.define + def Worky(x): + wfnd = workflow.add(Wfnd(x=x).split("x", x=x), name="wfnd") + return wfnd.out + + worky = Worky(x=[1, 2]) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "wfnd [shape=box, color=blue]" in dotstr_s_lines # nested graph - dotfile_s = wf.create_dotfile(type="nested") + dotfile_s = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_s_lines = dotfile_s.read_text().split("\n") assert "subgraph cluster_wfnd {" in dotstr_s_lines assert "color=blue" in dotstr_s_lines assert "add2" in dotstr_s_lines # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x}}"];' in dotstr_d_lines @@ -4659,36 +4169,41 @@ def test_graph_2st(tmpdir): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) + +def test_graph_3(tmp_path): + """creating a set of graphs, worky with two nodes (one node is a workflow)""" -def test_graph_3(tmpdir): - """creating a set of graphs, wf with two nodes (one node is a workflow)""" - wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) + @workflow.define + def Worky(x, y=1): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + wfnd = workflow.add(Wfnd(x=mult.out), name="wfnd") + return wfnd.out + + worky = Worky(x=2) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "mult" in dotstr_s_lines assert "wfnd [shape=box]" in dotstr_s_lines assert "mult -> wfnd" in dotstr_s_lines # nested graph - dotfile_n = wf.create_dotfile(type="nested") + dotfile_n = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_n_lines = dotfile_n.read_text().split("\n") assert "mult" in dotstr_n_lines assert "subgraph cluster_wfnd {" in dotstr_n_lines assert "add2" in dotstr_n_lines # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x | y}}"];' @@ -4698,39 +4213,44 @@ def test_graph_3(tmpdir): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) -def test_graph_3st(tmpdir): - """creating a set of graphs, wf with two nodes (one node is a workflow) +def test_graph_3st(tmp_path): + """creating a set of graphs, worky with two nodes (one node is a workflow) the first node has a state and it should be passed to the second node (blue node and a wfasnd, and blue arrow from the node to the wfasnd) """ - wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(multiply(name="mult", y=wf.lzin.y).split("x", x=wf.lzin.x)) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x), name="add2") + return add2.out + + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(y=y).split("x", x=x), name="mult") + wfnd = workflow.add(Wfnd(x=mult.out), name="wfnd") + return wfnd.out + + worky = Worky(x=[1, 2], y=2) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "mult [color=blue]" in dotstr_s_lines assert "wfnd [shape=box, color=blue]" in dotstr_s_lines assert "mult -> wfnd [color=blue]" in dotstr_s_lines # nested graph - dotfile_n = wf.create_dotfile(type="nested") + dotfile_n = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_n_lines = dotfile_n.read_text().split("\n") assert "mult [color=blue]" in dotstr_n_lines assert "subgraph cluster_wfnd {" in dotstr_n_lines assert "add2" in dotstr_n_lines # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x | y}}"];' @@ -4740,31 +4260,37 @@ def test_graph_3st(tmpdir): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) -def test_graph_4(tmpdir): - """creating a set of graphs, wf with two nodes (one node is a workflow with two nodes +def test_graph_4(tmp_path): + """creating a set of graphs, worky with two nodes (one node is a workflow with two nodes inside). Connection from the node to the inner workflow. """ - wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) - wfnd.add(add2(name="add2_a", x=wfnd.lzin.x)) - wfnd.add(add2(name="add2_b", x=wfnd.add2_a.lzout.out)) - wfnd.set_output([("out", wfnd.add2_b.lzout.out)]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) + + @workflow.define + def Wfnd(x): + add2_a = workflow.add(Add2(x=x), name="add2_a") + add2_b = workflow.add(Add2(x=add2_a.out), name="add2_b") + return add2_b.out + + @workflow.define + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + wfnd = workflow.add(Wfnd(x=mult.out), name="wfnd") + return wfnd.out + + worky = Worky(x=2, y=3) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "mult" in dotstr_s_lines assert "wfnd [shape=box]" in dotstr_s_lines assert "mult -> wfnd" in dotstr_s_lines # nested graph - dotfile_n = wf.create_dotfile(type="nested") + dotfile_n = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_n_lines = dotfile_n.read_text().split("\n") for el in ["mult", "add2_a", "add2_b"]: assert el in dotstr_n_lines @@ -4773,7 +4299,7 @@ def test_graph_4(tmpdir): assert "mult -> add2_a [lhead=cluster_wfnd]" # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x | y}}"];' @@ -4783,31 +4309,37 @@ def test_graph_4(tmpdir): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) -def test_graph_5(tmpdir): - """creating a set of graphs, wf with two nodes (one node is a workflow with two nodes +def test_graph_5(tmp_path): + """creating a set of graphs, worky with two nodes (one node is a workflow with two nodes inside). Connection from the inner workflow to the node. """ - wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x) - wfnd.add(add2(name="add2_a", x=wfnd.lzin.x)) - wfnd.add(add2(name="add2_b", x=wfnd.add2_a.lzout.out)) - wfnd.set_output([("out", wfnd.add2_b.lzout.out)]) - wf.add(wfnd) - wf.add(multiply(name="mult", x=wf.wfnd.lzout.out, y=wf.lzin.y)) - wf.set_output([("out", wf.mult.lzout.out)]) + + @workflow.define + def Wfnd(x): + add2_a = workflow.add(Add2(x=x), name="add2_a") + add2_b = workflow.add(Add2(x=add2_a.out), name="add2_b") + return add2_b.out + + @workflow.define + def Worky(x, y): + wfnd = workflow.add(Wfnd(x=x), name="wfnd") + mult = workflow.add(Multiply(x=wfnd.out, y=y), name="mult") + return mult.out + + worky = Worky(x=2, y=3) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "mult" in dotstr_s_lines assert "wfnd [shape=box]" in dotstr_s_lines assert "wfnd -> mult" in dotstr_s_lines # nested graph - dotfile_n = wf.create_dotfile(type="nested") + dotfile_n = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_n_lines = dotfile_n.read_text().split("\n") for el in ["mult", "add2_a", "add2_b"]: assert el in dotstr_n_lines @@ -4816,7 +4348,7 @@ def test_graph_5(tmpdir): assert "add2_b -> mult [ltail=cluster_wfnd]" # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x | y}}"];' @@ -4826,109 +4358,105 @@ def test_graph_5(tmpdir): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) @pytest.mark.timeout(20) -def test_duplicate_input_on_split_wf(tmpdir): +def test_duplicate_input_on_split_wf(tmp_path): """checking if the workflow gets stuck if it has to run two tasks with equal checksum; This can occur when splitting on a list containing duplicate values. """ text = ["test"] * 2 - @mark.task + @python.define def printer(a): return a - wf = Workflow(name="wf", input_spec=["text"], cache_dir=tmpdir) - wf.split(("text"), text=text) - - wf.add(printer(name="printer1", a=wf.lzin.text)) + @workflow.define + def Worky(text): + printer1 = workflow.add(printer(a=text)) + return printer1.out # out1 - wf.set_output([("out1", wf.printer1.lzout.out)]) + worky = Worky().split(text=text) - with Submitter(plugin="cf", n_procs=6) as sub: - sub(wf) + outputs = worky(worker="cf", n_procs=6) - res = wf.result() - - assert res[0].output.out1 == "test" and res[1].output.out1 == "test" + assert outputs.out1[0] == "test" and outputs.out1[0] == "test" @pytest.mark.timeout(40) -def test_inner_outer_wf_duplicate(tmpdir): +def test_inner_outer_wf_duplicate(tmp_path): """checking if the execution gets stuck if there is an inner and outer workflows that run two nodes with the exact same inputs. """ task_list = ["First", "Second"] start_list = [3, 4] - @mark.task - def one_arg(start_number): + @python.define + def OneArg(start_number): for k in range(10): start_number += 1 return start_number - @mark.task - def one_arg_inner(start_number): + @python.define + def OneArgInner(start_number): for k in range(10): start_number += 1 return start_number - # Outer workflow - test_outer = Workflow( - name="test_outer", - input_spec=["start_number", "task_name", "dummy"], - cache_dir=tmpdir, - dummy=1, - ) - # Splitting on both arguments - test_outer.split( - ["start_number", "task_name"], start_number=start_list, task_name=task_list - ) + # Inner Worky + @workflow.define(outputs=["res"]) + def InnerWf(start_number1): + inner_level1 = workflow.add(OneArgInner(start_number=start_number1)) + return inner_level1.out - # Inner Workflow - test_inner = Workflow(name="test_inner", input_spec=["start_number1"]) - test_inner.add( - one_arg_inner(name="Ilevel1", start_number=test_inner.lzin.start_number1) - ) - test_inner.set_output([("res", test_inner.Ilevel1.lzout.out)]) + # Outer workflow has two nodes plus the inner workflow - # Outer workflow has two nodes plus the inner workflow - test_outer.add(one_arg(name="level1", start_number=test_outer.lzin.start_number)) - test_outer.add(test_inner) - test_inner.inputs.start_number1 = test_outer.level1.lzout.out + # Outer workflow + @workflow.define(outputs=["res2"]) + def OuterWf(start_number, task_name, dummy): + level1 = workflow.add(OneArg(start_number=start_number)) + inner = workflow.add(InnerWf(start_number1=level1.out)) + return inner.res - test_outer.set_output([("res2", test_outer.test_inner.lzout.res)]) + test_outer = OuterWf(dummy=1).split( + ["start_number", "task_name"], start_number=start_list, task_name=task_list + ) - with Submitter(plugin="cf") as sub: - sub(test_outer) + with Submitter(worker="cf") as sub: + res = sub(test_outer) - res = test_outer.result() - assert res[0].output.res2 == 23 and res[1].output.res2 == 23 + assert res.outputs.res2[0] == 23 and res.outputs.res2[1] == 23 -def test_rerun_errored(tmpdir, capfd): +def test_rerun_errored(tmp_path, capfd): """Test rerunning a workflow containing errors. Only the errored tasks and workflow should be rerun""" - @mark.task - def pass_odds(x): + class EvenException(Exception): + pass + + @python.define + def PassOdds(x): if x % 2 == 0: print(f"x%2 = {x % 2} (error)\n") - raise Exception("even error") + raise EvenException("even error") else: print(f"x%2 = {x % 2}\n") return x - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(pass_odds(name="pass_odds").split("x", x=[1, 2, 3, 4, 5])) - wf.set_output([("out", wf.pass_odds.lzout.out)]) + @workflow.define + def Worky(x): + pass_odds = workflow.add(PassOdds().split("x", x=x)) + return pass_odds.out - with pytest.raises(Exception): - wf() - with pytest.raises(Exception): - wf() + worky = Worky(x=[1, 2, 3, 4, 5]) + + with pytest.raises(RuntimeError): + # Must be cf to get the error from all tasks, otherwise will only get the first error + worky(worker="cf", cache_dir=tmp_path, n_procs=5) + with pytest.raises(RuntimeError): + worky(worker="cf", cache_dir=tmp_path, n_procs=5) out, err = capfd.readouterr() stdout_lines = out.splitlines() @@ -4948,84 +4476,92 @@ def pass_odds(x): assert errors_found == 4 -def test_wf_state_arrays(): - wf = Workflow( - name="test", - input_spec={"x": ty.List[int], "y": int}, - output_spec={"alpha": int, "beta": ty.List[int]}, - ) +def test_wf_state_arrays(tmp_path, plugin): + @workflow.define(outputs={"alpha": int, "beta": ty.List[int]}) + def Worky(x: ty.List[int], y: int): - wf.add( # Split over workflow input "x" on "scalar" input - list_mult_sum( - in_list=wf.lzin.x, - name="A", - ).split(scalar=wf.lzin.x) - ) + A = workflow.add( # Split over workflow input "x" on "scalar" input + ListMultSum( + in_list=x, + ).split(scalar=x) + ) - wf.add( # Workflow is still split over "x", combined over "x" on out - list_mult_sum( - name="B", - scalar=wf.A.lzout.sum, - in_list=wf.A.lzout.products, - ).combine("A.scalar") - ) + B = workflow.add( # Worky is still split over "x", combined over "x" on out + ListMultSum( + scalar=A.sum, + in_list=A.products, + ).combine("A.scalar") + ) - wf.add( # Workflow " - list_mult_sum( - name="C", - scalar=wf.lzin.y, - in_list=wf.B.lzout.sum, + C = workflow.add( # Worky " + ListMultSum( + scalar=y, + in_list=B.sum, + ) ) - ) - wf.add( # Workflow is split again, this time over C.products - list_mult_sum( - name="D", - in_list=wf.lzin.x, + D = workflow.add( # Worky is split again, this time over C.products + ListMultSum( + in_list=x, + ) + .split(scalar=C.products) + .combine("scalar") ) - .split(scalar=wf.C.lzout.products) - .combine("scalar") - ) - wf.add( # Workflow is finally combined again into a single node - list_mult_sum(name="E", scalar=wf.lzin.y, in_list=wf.D.lzout.sum) - ) + E = workflow.add( # Worky is finally combined again into a single node + ListMultSum(scalar=y, in_list=D.sum) + ) - wf.set_output([("alpha", wf.E.lzout.sum), ("beta", wf.E.lzout.products)]) + return E.sum, E.products - results = wf(x=[1, 2, 3, 4], y=10) - assert results.output.alpha == 3000000 - assert results.output.beta == [100000, 400000, 900000, 1600000] + worky = Worky(x=[1, 2, 3, 4], y=10) + outputs = worky(cache_dir=tmp_path, plugin=plugin) + assert outputs.alpha == 3000000 + assert outputs.beta == [100000, 400000, 900000, 1600000] -def test_wf_input_output_typing(): - wf = Workflow( - name="test", - input_spec={"x": int, "y": ty.List[int]}, - output_spec={"alpha": int, "beta": ty.List[int]}, - ) - with pytest.raises(TypeError) as exc_info: - list_mult_sum( - scalar=wf.lzin.y, - in_list=wf.lzin.y, +def test_wf_input_typing_fail(): + + @workflow.define(outputs={"alpha": int, "beta": ty.List[int]}) + def MismatchInputWf(x: int, y: int): + ListMultSum( + scalar=y, + in_list=y, name="A", ) - exc_info_matches(exc_info, "Cannot coerce into ") - wf.add( # Split over workflow input "x" on "scalar" input - list_mult_sum( - scalar=wf.lzin.x, - in_list=wf.lzin.y, - name="A", + with pytest.raises(TypeError, match="Incorrect type for field in 'y'"): + MismatchInputWf(x=1, y=[1, 2, 3]) + + +def test_wf_output_typing_fail(): + + @workflow.define(outputs={"alpha": int, "beta": ty.List[int]}) + def MismatchOutputWf(x: int, y: ty.List[int]): + A = workflow.add( # Split over workflow input "x" on "scalar" input + ListMultSum( + scalar=x, + in_list=y, + ) ) - ) + return A.products, A.products with pytest.raises(TypeError, match="don't match their declared types"): - wf.set_output( - [ - ("alpha", wf.A.lzout.products), - ] + MismatchOutputWf(x=1, y=[1, 2, 3]) + + +def test_wf_input_output_typing(): + @workflow.define(outputs={"alpha": int, "beta": ty.List[int]}) + def Worky(x: int, y: ty.List[int]): + A = workflow.add( # Split over workflow input "x" on "scalar" input + ListMultSum( + scalar=x, + in_list=y, + ) ) + return A.sum, A.products - wf.set_output([("alpha", wf.A.lzout.sum), ("beta", wf.A.lzout.products)]) + outputs = Worky(x=10, y=[1, 2, 3, 4])() + assert outputs.sum == 10 + assert outputs.products == [10, 20, 30, 40] diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index 5b0858866c..2f3b973bb3 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -1,6 +1,7 @@ # Tasks for testing import time -import sys, shutil +import sys +import shutil import typing as ty from pathlib import Path import functools @@ -8,10 +9,13 @@ import subprocess as sp import pytest from fileformats.generic import File - -from ..core import Workflow +from pydra.engine.helpers import list_fields +from pydra.engine.specs import ShellDef from ..submitter import Submitter -from ... import mark +from pydra.design import workflow, python + +if ty.TYPE_CHECKING: + from pydra.engine.environments import Environment need_docker = pytest.mark.skipif( @@ -35,18 +39,34 @@ ) -def result_no_submitter(shell_task, plugin=None): +def get_output_names(task): + return sorted(f.name for f in list_fields(task.Outputs)) + + +def run_no_submitter( + shell_def: ShellDef, + cache_dir: Path | None = None, + plugin: str | None = None, + environment: "Environment | None" = None, +): """helper function to return result when running without submitter""" - return shell_task() + return shell_def(worker=plugin, cache_dir=cache_dir, environment=environment) -def result_submitter(shell_task, plugin): +def run_submitter( + shell_def: ShellDef, + cache_dir: Path | None = None, + plugin: str | None = None, + environment: "Environment | None" = None, +): """helper function to return result when running with submitter with specific plugin """ - with Submitter(plugin=plugin) as sub: - shell_task(submitter=sub) - return shell_task.result() + with Submitter(worker=plugin, cache_dir=cache_dir, environment=environment) as sub: + results = sub(shell_def) + if results.errored: + raise RuntimeError(f"task {shell_def} failed:\n" + "\n".join(results.errors)) + return results.outputs dot_check = sp.run(["which", "dot"], stdout=sp.PIPE, stderr=sp.PIPE) @@ -56,13 +76,13 @@ def result_submitter(shell_task, plugin): DOT_FLAG = False -@mark.task -def op_4var(a, b, c, d) -> str: +@python.define +def Op4Var(a, b, c, d) -> str: return f"{a} {b} {c} {d}" -@mark.task -def fun_addtwo(a: int) -> int: +@python.define +def FunAddTwo(a: int) -> int: import time time.sleep(1) @@ -71,8 +91,8 @@ def fun_addtwo(a: int) -> int: return a + 2 -@mark.task -def fun_addtwo_notype(a): +@python.define +def FunAddTwoNoType(a): import time time.sleep(1) @@ -81,8 +101,8 @@ def fun_addtwo_notype(a): return a + 2 -@mark.task -def fun_addtwo_with_threadcount(a: int, sgeThreads: int = 1) -> int: +@python.define +def FunAddTwoWithThreadCount(a: int, sgeThreads: int = 1) -> int: import time time.sleep(1) @@ -91,158 +111,151 @@ def fun_addtwo_with_threadcount(a: int, sgeThreads: int = 1) -> int: return a + 2 -@mark.task -def fun_addvar( - a: ty.Union[int, float], b: ty.Union[int, float] -) -> ty.Union[int, float]: +@python.define +def FunAddVar(a: ty.Union[int, float], b: ty.Union[int, float]) -> ty.Union[int, float]: return a + b -@mark.task -def fun_addvar_notype(a, b): +@python.define +def FunAddVarNoType(a, b): return a + b -@mark.task -@mark.annotate({"return": {"sum": float, "sub": float}}) -def fun_addsubvar(a: float, b: float): +@python.define(outputs={"sum": float, "sub": float}) +def FunAddSubVar(a: float, b: float): return a + b, a - b -@mark.task -def fun_addvar_none(a: int, b: ty.Optional[int]) -> int: +@python.define +def FunAddVarNone(a: int, b: ty.Optional[int]) -> int: if b is None: return a else: return a + b -@mark.task -def fun_addvar_default(a: int, b: int = 1) -> int: +@python.define +def FunAddVarDefault(a: int, b: int = 1) -> int: return a + b -@mark.task -def fun_addvar_default_notype(a, b=1): +@python.define +def FunAddVarDefaultNoType(a, b=1): return a + b -@mark.task -def fun_addvar3(a: int, b: int, c: int) -> int: +@python.define +def FunAddVar3(a: int, b: int, c: int) -> int: return a + b + c -@mark.task -def fun_addvar4(a: int, b: int, c: int, d: int) -> int: +@python.define +def FunAddVar4(a: int, b: int, c: int, d: int) -> int: return a + b + c + d -@mark.task -def moment(lst: ty.List[float], n: float) -> float: +@python.define +def Moment(lst: ty.List[float], n: float) -> float: return sum([i**n for i in lst]) / len(lst) -@mark.task -def fun_div(a: ty.Union[int, float], b: ty.Union[int, float]) -> float: +@python.define +def FunDiv(a: ty.Union[int, float], b: ty.Union[int, float]) -> float: return a / b -@mark.task -def multiply(x: int, y: int) -> int: +@python.define +def Multiply(x: int, y: int) -> int: return x * y -@mark.task -def multiply_list(x: list, y: int) -> list: +@python.define +def MultiplyList(x: list, y: int) -> list: return x * y -@mark.task -def multiply_mixed(x: list, y: int) -> list: +@python.define +def MultiplyMixed(x: list, y: int) -> list: return x * y -@mark.task -def add2(x: int) -> int: +@python.define +def Add2(x: int) -> int: if x == 1 or x == 12: time.sleep(1) return x + 2 -@mark.task -def raise_xeq1(x: int) -> int: +@python.define +def RaiseXeq1(x: int) -> int: if x == 1: raise Exception("x is 1, so i'm raising an exception!") return x -@mark.task -@mark.annotate({"return": {"out_add": float, "out_sub": float}}) -def add2_sub2_res(res): +@python.define(outputs={"out_add": float, "out_sub": float}) +def Add2Sub2Res(res): """function that takes entire output as an input""" return res["out"] + 2, res["out"] - 2 -@mark.task -@mark.annotate({"return": {"out_add": ty.List[float], "out_sub": ty.List[float]}}) -def add2_sub2_res_list(res): +@python.define(outputs={"out_add": ty.List[float], "out_sub": ty.List[float]}) +def Add2Sub2ResList(res): """function that takes entire output as an input""" return [r["out"] + 2 for r in res], [r["out"] - 2 for r in res] -@mark.task -def power(a: int, b: int) -> int: +@python.define +def Power(a: int, b: int) -> int: return a**b -@mark.task -def identity(x): +@python.define +def Identity(x): return x -@mark.task -def identity_2flds( - x1, x2 -) -> ty.NamedTuple("Output", [("out1", ty.Any), ("out2", ty.Any)]): +@python.define(outputs={"out1": ty.Any, "out2": ty.Any}) +def Identity2Flds(x1, x2): return x1, x2 -@mark.task -def ten(x) -> int: +@python.define +def Ten(x) -> int: return 10 -@mark.task -def add2_wait(x: int) -> int: +@python.define +def Add2Wait(x: int) -> int: time.sleep(2) return x + 2 -@mark.task -def list_output(x: int) -> ty.List[int]: +@python.define +def ListOutput(x: int) -> ty.List[int]: return [x, 2 * x, 3 * x] -@mark.task -def list_sum(x: ty.Sequence[ty.Union[int, float]]) -> ty.Union[int, float]: +@python.define +def ListSum(x: ty.Sequence[ty.Union[int, float]]) -> ty.Union[int, float]: return sum(x) -@mark.task -def fun_dict(d: dict) -> str: +@python.define +def FunDict(d: dict) -> str: kv_list = [f"{k}:{v}" for (k, v) in d.items()] return "_".join(kv_list) -@mark.task -def fun_write_file(filename: Path, text="hello") -> File: +@python.define +def FunWriteFile(filename: Path, text="hello") -> File: with open(filename, "w") as f: f.write(text) return File(filename) -@mark.task -def fun_write_file_list( +@python.define +def FunWriteFileList( filename_list: ty.List[ty.Union[str, File, Path]], text="hi" ) -> ty.List[File]: for ii, filename in enumerate(filename_list): @@ -252,8 +265,8 @@ def fun_write_file_list( return filename_list -@mark.task -def fun_write_file_list2dict( +@python.define +def FunWriteFileList2Dict( filename_list: ty.List[ty.Union[str, File, Path]], text="hi" ) -> ty.Dict[str, ty.Union[File, int]]: filename_dict = {} @@ -266,15 +279,15 @@ def fun_write_file_list2dict( return filename_dict -@mark.task -def fun_file(filename: File): +@python.define +def FunFile(filename: File): with open(filename) as f: txt = f.read() return txt -@mark.task -def fun_file_list(filename_list: ty.List[File]): +@python.define +def FunFileList(filename_list: ty.List[File]): txt_list = [] for filename in filename_list: with open(filename) as f: @@ -282,75 +295,36 @@ def fun_file_list(filename_list: ty.List[File]): return " ".join(txt_list) -def gen_basic_wf(name="basic-wf"): - """ - Generates `Workflow` of two tasks +@workflow.define(outputs=["out"]) +def BasicWorkflow(x): + task1 = workflow.add(FunAddTwo(a=x), name="A") + task2 = workflow.add(FunAddVar(a=task1.out, b=2), name="B") + return task2.out - Task Input - ---------- - x : int (5) - Task Output - ----------- - out : int (9) - """ - wf = Workflow(name=name, input_spec=["x"]) - wf.inputs.x = 5 - wf.add(fun_addtwo(name="task1", a=wf.lzin.x, b=0)) - wf.add(fun_addvar(name="task2", a=wf.task1.lzout.out, b=2)) - wf.set_output([("out", wf.task2.lzout.out)]) - return wf +@workflow.define(outputs=["out"]) +def BasicWorkflowWithThreadCount(x): + task1 = workflow.add(FunAddTwoWithThreadCount(a=x, sgeThreads=4)) + task2 = workflow.add(FunAddVar(a=task1.out, b=2)) + return task2.out -def gen_basic_wf_with_threadcount(name="basic-wf-with-threadcount"): - """ - Generates `Workflow` of two tasks +@workflow.define(outputs=["out1", "out2"]) +def BasicWorkflowWithThreadCountConcurrent(x): + task1_1 = workflow.add(FunAddTwoWithThreadCount(a=x, sgeThreads=4)) + task1_2 = workflow.add(FunAddTwoWithThreadCount(a=x, sgeThreads=2)) + task2 = workflow.add(FunAddVar(a=task1_1.out, b=2)) + return task2.out, task1_2.out - Task Input - ---------- - x : int (5) + # return Workflow(x=5) - Task Output - ----------- - out : int (9) - """ - wf = Workflow(name=name, input_spec=["x"]) - wf.inputs.x = 5 - wf.add(fun_addtwo_with_threadcount(name="task1", a=wf.lzin.x, sgeThreads=4)) - wf.add(fun_addvar(name="task2", a=wf.task1.lzout.out, b=2)) - wf.set_output([("out", wf.task2.lzout.out)]) - return wf - - -def gen_basic_wf_with_threadcount_concurrent(name="basic-wf-with-threadcount"): - """ - Generates `Workflow` of two tasks - - Task Input - ---------- - x : int (5) - Task Output - ----------- - out : int (9) - """ - wf = Workflow(name=name, input_spec=["x"]) - wf.inputs.x = 5 - wf.add(fun_addtwo_with_threadcount(name="task1_1", a=wf.lzin.x, sgeThreads=4)) - wf.add(fun_addtwo_with_threadcount(name="task1_2", a=wf.lzin.x, sgeThreads=2)) - wf.add(fun_addvar(name="task2", a=wf.task1_1.lzout.out, b=2)) - wf.set_output([("out1", wf.task2.lzout.out), ("out2", wf.task1_2.lzout.out)]) - return wf - - -@mark.task -@mark.annotate({"return": {"sum": int, "products": ty.List[int]}}) -def list_mult_sum(scalar: int, in_list: ty.List[int]) -> ty.Tuple[int, ty.List[int]]: +@python.define(outputs={"sum": int, "products": ty.List[int]}) +def ListMultSum(scalar: int, in_list: ty.List[int]) -> ty.Tuple[int, ty.List[int]]: products = [scalar * x for x in in_list] return functools.reduce(operator.add, products, 0), products -@mark.task -@mark.annotate({"return": {"x": str, "y": int, "z": float}}) -def foo(a: str, b: int, c: float) -> ty.Tuple[str, int, float]: +@python.define(outputs={"x": str, "y": int, "z": float}) +def Foo(a: str, b: int, c: float) -> ty.Tuple[str, int, float]: return a, b, c diff --git a/pydra/engine/workers.py b/pydra/engine/workers.py index eaa40beb0a..ab846ed427 100644 --- a/pydra/engine/workers.py +++ b/pydra/engine/workers.py @@ -3,44 +3,64 @@ import asyncio import sys import json +import abc import re +import inspect +import typing as ty from tempfile import gettempdir from pathlib import Path from shutil import copyfile, which - +import cloudpickle as cp import concurrent.futures as cf - -from .core import TaskBase +from .core import Task +from .specs import TaskDef from .helpers import ( get_available_cpus, read_and_display_async, save, - load_and_run, load_task, ) import logging - import random logger = logging.getLogger("pydra.worker") +if ty.TYPE_CHECKING: + from .specs import Result + +DefType = ty.TypeVar("DefType", bound="TaskDef") + -class Worker: +class Worker(metaclass=abc.ABCMeta): """A base class for execution of tasks.""" + plugin_name: str + def __init__(self, loop=None): """Initialize the worker.""" logger.debug(f"Initializing {self.__class__.__name__}") self.loop = loop - def run_el(self, interface, **kwargs): + @abc.abstractmethod + def run(self, task: "Task[DefType]", rerun: bool = False) -> "Result": """Return coroutine for task execution.""" - raise NotImplementedError + pass + + async def run_async(self, task: "Task[DefType]", rerun: bool = False) -> "Result": + if task.is_async: + return await task.run_async(rerun=rerun) + else: + return task.run(rerun=rerun) def close(self): """Close this worker.""" + @property + def is_async(self) -> bool: + """Return whether the worker is asynchronous.""" + return inspect.iscoroutinefunction(self.run) + async def fetch_finished(self, futures): """ Awaits asyncio's :class:`asyncio.Task` until one is finished. @@ -125,35 +145,28 @@ async def fetch_finished(self, futures): return pending.union(unqueued) -class SerialWorker(Worker): +class DebugWorker(Worker): """A worker to execute linearly.""" - plugin_name = "serial" + plugin_name: str = "debug" def __init__(self, **kwargs): """Initialize worker.""" logger.debug("Initialize SerialWorker") - def run_el(self, interface, rerun=False, environment=None, **kwargs): + def run( + self, + task: "Task[DefType]", + rerun: bool = False, + ) -> "Result": """Run a task.""" - return self.exec_serial(interface, rerun=rerun, environment=environment) + return task.run(rerun=rerun) def close(self): """Return whether the task is finished.""" - async def exec_serial(self, runnable, rerun=False, environment=None): - if isinstance(runnable, TaskBase): - return runnable._run(rerun, environment=environment) - else: # it could be tuple that includes pickle files with tasks and inputs - ind, task_main_pkl, _ = runnable - return load_and_run(task_main_pkl, ind, rerun, environment=environment) - async def fetch_finished(self, futures): - await asyncio.gather(*futures) - return set() - - # async def fetch_finished(self, futures): - # return await asyncio.wait(futures) + raise NotImplementedError("DebugWorker does not support async execution") class ConcurrentFuturesWorker(Worker): @@ -161,7 +174,10 @@ class ConcurrentFuturesWorker(Worker): plugin_name = "cf" - def __init__(self, n_procs=None): + n_procs: int + loop: cf.ProcessPoolExecutor + + def __init__(self, n_procs: int | None = None): """Initialize Worker.""" super().__init__() self.n_procs = get_available_cpus() if n_procs is None else n_procs @@ -170,23 +186,23 @@ def __init__(self, n_procs=None): # self.loop = asyncio.get_event_loop() logger.debug("Initialize ConcurrentFuture") - def run_el(self, runnable, rerun=False, environment=None, **kwargs): + async def run( + self, + task: "Task[DefType]", + rerun: bool = False, + ) -> "Result": """Run a task.""" assert self.loop, "No event loop available to submit tasks" - return self.exec_as_coro(runnable, rerun=rerun, environment=environment) + task_pkl = cp.dumps(task) + return await self.loop.run_in_executor( + self.pool, self.unpickle_and_run, task_pkl, rerun + ) - async def exec_as_coro(self, runnable, rerun=False, environment=None): - """Run a task (coroutine wrapper).""" - if isinstance(runnable, TaskBase): - res = await self.loop.run_in_executor( - self.pool, runnable._run, rerun, environment - ) - else: # it could be tuple that includes pickle files with tasks and inputs - ind, task_main_pkl, task_orig = runnable - res = await self.loop.run_in_executor( - self.pool, load_and_run, task_main_pkl, ind, rerun, environment - ) - return res + @classmethod + def unpickle_and_run(cls, task_pkl: Path, rerun: bool) -> "Result": + """Unpickle and run a task.""" + task: Task[DefType] = cp.loads(task_pkl) + return task.run(rerun=rerun) def close(self): """Finalize the internal pool of tasks.""" @@ -223,24 +239,8 @@ def __init__(self, loop=None, max_jobs=None, poll_delay=1, sbatch_args=None): self.sbatch_args = sbatch_args or "" self.error = {} - def run_el(self, runnable, rerun=False, environment=None): - """Worker submission API.""" - script_dir, batch_script = self._prepare_runscripts(runnable, rerun=rerun) - if (script_dir / script_dir.parts[1]) == gettempdir(): - logger.warning("Temporary directories may not be shared across computers") - if isinstance(runnable, TaskBase): - cache_dir = runnable.cache_dir - name = runnable.name - uid = runnable.uid - else: # runnable is a tuple (ind, pkl file, task) - cache_dir = runnable[-1].cache_dir - name = runnable[-1].name - uid = f"{runnable[-1].uid}_{runnable[0]}" - - return self._submit_job(batch_script, name=name, uid=uid, cache_dir=cache_dir) - def _prepare_runscripts(self, task, interpreter="/bin/sh", rerun=False): - if isinstance(task, TaskBase): + if isinstance(task, Task): cache_dir = task.cache_dir ind = None uid = task.uid @@ -264,7 +264,7 @@ def _prepare_runscripts(self, task, interpreter="/bin/sh", rerun=False): batchscript = script_dir / f"batchscript_{uid}.sh" python_string = ( f"""'from pydra.engine.helpers import load_and_run; """ - f"""load_and_run(task_pkl="{task_pkl}", ind={ind}, rerun={rerun}) '""" + f"""load_and_run("{task_pkl}", rerun={rerun}) '""" ) bcmd = "\n".join( ( @@ -277,13 +277,16 @@ def _prepare_runscripts(self, task, interpreter="/bin/sh", rerun=False): fp.writelines(bcmd) return script_dir, batchscript - async def _submit_job(self, batchscript, name, uid, cache_dir): - """Coroutine that submits task runscript and polls job until completion or error.""" - script_dir = cache_dir / f"{self.__class__.__name__}_scripts" / uid + async def run(self, task: "Task[DefType]", rerun: bool = False) -> "Result": + """Worker submission API.""" + script_dir, batch_script = self._prepare_runscripts(task, rerun=rerun) + if (script_dir / script_dir.parts[1]) == gettempdir(): + logger.warning("Temporary directories may not be shared across computers") + script_dir = task.cache_dir / f"{self.__class__.__name__}_scripts" / task.uid sargs = self.sbatch_args.split() jobname = re.search(r"(?<=-J )\S+|(?<=--job-name=)\S+", self.sbatch_args) if not jobname: - jobname = ".".join((name, uid)) + jobname = ".".join((task.name, task.uid)) sargs.append(f"--job-name={jobname}") output = re.search(r"(?<=-o )\S+|(?<=--output=)\S+", self.sbatch_args) if not output: @@ -295,7 +298,7 @@ async def _submit_job(self, batchscript, name, uid, cache_dir): sargs.append(f"--error={error_file}") else: error_file = None - sargs.append(str(batchscript)) + sargs.append(str(batch_script)) # TO CONSIDER: add random sleep to avoid overloading calls rc, stdout, stderr = await read_and_display_async( "sbatch", *sargs, hide_display=True @@ -322,12 +325,12 @@ async def _submit_job(self, batchscript, name, uid, cache_dir): and "--no-requeue" not in self.sbatch_args ): # loading info about task with a specific uid - info_file = cache_dir / f"{uid}_info.json" + info_file = task.cache_dir / f"{task.uid}_info.json" if info_file.exists(): checksum = json.loads(info_file.read_text())["checksum"] - if (cache_dir / f"{checksum}.lock").exists(): + if (task.cache_dir / f"{checksum}.lock").exists(): # for pyt3.8 we could you missing_ok=True - (cache_dir / f"{checksum}.lock").unlink() + (task.cache_dir / f"{checksum}.lock").unlink() cmd_re = ("scontrol", "requeue", jobid) await read_and_display_async(*cmd_re, hide_display=True) else: @@ -453,40 +456,8 @@ def __init__( self.default_qsub_args = default_qsub_args self.max_mem_free = max_mem_free - def run_el(self, runnable, rerun=False): # TODO: add env - """Worker submission API.""" - ( - script_dir, - batch_script, - task_pkl, - ind, - output_dir, - task_qsub_args, - ) = self._prepare_runscripts(runnable, rerun=rerun) - if (script_dir / script_dir.parts[1]) == gettempdir(): - logger.warning("Temporary directories may not be shared across computers") - if isinstance(runnable, TaskBase): - cache_dir = runnable.cache_dir - name = runnable.name - uid = runnable.uid - else: # runnable is a tuple (ind, pkl file, task) - cache_dir = runnable[-1].cache_dir - name = runnable[-1].name - uid = f"{runnable[-1].uid}_{runnable[0]}" - - return self._submit_job( - batch_script, - name=name, - uid=uid, - cache_dir=cache_dir, - task_pkl=task_pkl, - ind=ind, - output_dir=output_dir, - task_qsub_args=task_qsub_args, - ) - def _prepare_runscripts(self, task, interpreter="/bin/sh", rerun=False): - if isinstance(task, TaskBase): + if isinstance(task, Task): cache_dir = task.cache_dir ind = None uid = task.uid @@ -556,17 +527,19 @@ async def check_for_results_files(self, jobid, threads_requested): del self.result_files_by_jobid[jobid][task] self.threads_used -= threads_requested - async def _submit_jobs( - self, - batchscript, - name, - uid, - cache_dir, - output_dir, - task_qsub_args, - interpreter="/bin/sh", - ): - # Get the number of slots requested for this task + async def run(self, task: "Task[DefType]", rerun: bool = False) -> "Result": + """Worker submission API.""" + ( + script_dir, + batch_script, + task_pkl, + ind, + output_dir, + task_qsub_args, + ) = self._prepare_runscripts(task, rerun=rerun) + if (script_dir / script_dir.parts[1]) == gettempdir(): + logger.warning("Temporary directories may not be shared across computers") + interpreter = "/bin/sh" threads_requested = self.default_threads_per_task if "smp" in task_qsub_args: smp_index = task_qsub_args.split().index("smp") @@ -608,12 +581,11 @@ async def _submit_jobs( python_string = f"""import sys; from pydra.engine.helpers import load_and_run; \ task_pkls={[task_tuple for task_tuple in tasks_to_run]}; \ task_index=int(sys.argv[1])-1; \ - load_and_run(task_pkl=task_pkls[task_index][0], \ - ind=task_pkls[task_index][1], rerun=task_pkls[task_index][2])""" + load_and_run(task_pkls[task_index][0], rerun=task_pkls[task_index][1])""" bcmd_job = "\n".join( ( f"#!{interpreter}", - f"{sys.executable} {Path(batchscript).with_suffix('.py')}" + f"{sys.executable} {Path(batch_script).with_suffix('.py')}" + " $SGE_TASK_ID", ) ) @@ -622,13 +594,15 @@ async def _submit_jobs( # Better runtime when the python contents are written to file # rather than given by cmdline arg -c - with Path(batchscript).with_suffix(".py").open("wt") as fp: + with Path(batch_script).with_suffix(".py").open("wt") as fp: fp.write(bcmd_py) - with batchscript.open("wt") as fp: + with batch_script.open("wt") as fp: fp.writelines(bcmd_job) - script_dir = cache_dir / f"{self.__class__.__name__}_scripts" / uid + script_dir = ( + task.cache_dir / f"{self.__class__.__task.name__}_scripts" / task.uid + ) script_dir.mkdir(parents=True, exist_ok=True) sargs = ["-t"] sargs.append(f"1-{len(tasks_to_run)}") @@ -637,7 +611,7 @@ async def _submit_jobs( jobname = re.search(r"(?<=-N )\S+", task_qsub_args) if not jobname: - jobname = ".".join((name, uid)) + jobname = ".".join((task.name, task.uid)) sargs.append("-N") sargs.append(jobname) output = re.search(r"(?<=-o )\S+", self.qsub_args) @@ -655,7 +629,7 @@ async def _submit_jobs( sargs.append(error_file) else: error_file = None - sargs.append(str(batchscript)) + sargs.append(str(batch_script)) await asyncio.sleep(random.uniform(0, 5)) @@ -687,7 +661,12 @@ async def _submit_jobs( exit_status = await self._verify_exit_code(jobid) if exit_status == "ERRORED": jobid = await self._rerun_job_array( - cache_dir, uid, sargs, tasks_to_run, error_file, jobid + task.cache_dir, + task.uid, + sargs, + tasks_to_run, + error_file, + jobid, ) else: for task_pkl, ind, rerun in tasks_to_run: @@ -700,17 +679,27 @@ async def _submit_jobs( exit_status = await self._verify_exit_code(jobid) if exit_status == "ERRORED": jobid = await self._rerun_job_array( - cache_dir, uid, sargs, tasks_to_run, error_file, jobid + task.cache_dir, + task.uid, + sargs, + tasks_to_run, + error_file, + jobid, ) poll_counter = 0 poll_counter += 1 await asyncio.sleep(self.poll_delay) else: - done = await self._poll_job(jobid, cache_dir) + done = await self._poll_job(jobid, task.cache_dir) if done: if done == "ERRORED": # If the SGE job was evicted, rerun it jobid = await self._rerun_job_array( - cache_dir, uid, sargs, tasks_to_run, error_file, jobid + task.cache_dir, + task.uid, + sargs, + tasks_to_run, + error_file, + jobid, ) else: self.job_completed_by_jobid[jobid] = True @@ -881,23 +870,15 @@ def __init__(self, **kwargs): self.client_args = kwargs logger.debug("Initialize Dask") - def run_el(self, runnable, rerun=False, **kwargs): - """Run a task.""" - return self.exec_dask(runnable, rerun=rerun) - - async def exec_dask(self, runnable, rerun=False): - """Run a task (coroutine wrapper).""" + async def run( + self, + task: "Task[DefType]", + rerun: bool = False, + ) -> "Result": from dask.distributed import Client async with Client(**self.client_args, asynchronous=True) as client: - if isinstance(runnable, TaskBase): - future = client.submit(runnable._run, rerun) - result = await future - else: # it could be tuple that includes pickle files with tasks and inputs - ind, task_main_pkl, task_orig = runnable - future = client.submit(load_and_run, task_main_pkl, ind, rerun) - result = await future - return result + return await client.submit(task.run, rerun) def close(self): """Finalize the internal pool of tasks.""" @@ -924,10 +905,6 @@ def __init__(self, **kwargs): logger.debug("Initialize PsijWorker") self.psij = psij - def run_el(self, interface, rerun=False, **kwargs): - """Run a task.""" - return self.exec_psij(interface, rerun=rerun) - def make_spec(self, cmd=None, arg=None): """ Create a PSI/J job specification. @@ -941,7 +918,7 @@ def make_spec(self, cmd=None, arg=None): Returns ------- - psij.JobSpec + psij.JobDef PSI/J job specification. """ spec = self.psij.JobSpec() @@ -956,7 +933,7 @@ def make_job(self, spec, attributes): Parameters ---------- - spec : psij.JobSpec + definition : psij.JobDef PSI/J job specification. attributes : any Job attributes. @@ -970,7 +947,11 @@ def make_job(self, spec, attributes): job.spec = spec return job - async def exec_psij(self, runnable, rerun=False): + async def run( + self, + task: "Task[DefType]", + rerun: bool = False, + ) -> "Result": """ Run a task (coroutine wrapper). @@ -989,31 +970,12 @@ async def exec_psij(self, runnable, rerun=False): jex = self.psij.JobExecutor.get_instance(self.subtype) absolute_path = Path(__file__).parent - if isinstance(runnable, TaskBase): - cache_dir = runnable.cache_dir - file_path = cache_dir / "runnable_function.pkl" - with open(file_path, "wb") as file: - pickle.dump(runnable._run, file) - func_path = absolute_path / "run_pickled.py" - spec = self.make_spec("python", [func_path, file_path]) - else: # it could be tuple that includes pickle files with tasks and inputs - cache_dir = runnable[-1].cache_dir - file_path_1 = cache_dir / "taskmain.pkl" - file_path_2 = cache_dir / "ind.pkl" - ind, task_main_pkl, task_orig = runnable - with open(file_path_1, "wb") as file: - pickle.dump(task_main_pkl, file) - with open(file_path_2, "wb") as file: - pickle.dump(ind, file) - func_path = absolute_path / "run_pickled.py" - spec = self.make_spec( - "python", - [ - func_path, - file_path_1, - file_path_2, - ], - ) + cache_dir = task.cache_dir + file_path = cache_dir / "runnable_function.pkl" + with open(file_path, "wb") as file: + pickle.dump(task.run, file) + func_path = absolute_path / "run_pickled.py" + spec = self.make_spec("python", [func_path, file_path]) if rerun: spec.arguments.append("--rerun") @@ -1032,7 +994,7 @@ async def exec_psij(self, runnable, rerun=False): f"stderr_path '{spec.stderr_path}' is not empty. Contents:\n{stderr_contents}" ) - return + return task.result() def close(self): """Finalize the internal pool of tasks.""" @@ -1056,7 +1018,7 @@ class PsijSlurmWorker(PsijWorker): WORKERS = { w.plugin_name: w for w in ( - SerialWorker, + DebugWorker, ConcurrentFuturesWorker, SlurmWorker, DaskWorker, diff --git a/pydra/mark/__init__.py b/pydra/mark/__init__.py deleted file mode 100644 index 31e4cf832e..0000000000 --- a/pydra/mark/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .functions import annotate, task - -__all__ = ("annotate", "task") diff --git a/pydra/mark/functions.py b/pydra/mark/functions.py deleted file mode 100644 index e191a61809..0000000000 --- a/pydra/mark/functions.py +++ /dev/null @@ -1,49 +0,0 @@ -""" Decorators to apply to functions used in Pydra workflows """ - -from functools import wraps - - -def annotate(annotation): - """ - Update the annotation of a function. - - Example - ------- - >>> import pydra - >>> @pydra.mark.annotate({'a': int, 'return': float}) - ... def square(a): - ... return a ** 2.0 - - """ - import inspect - - def decorate(func): - sig = inspect.signature(func) - unknown = set(annotation) - set(sig.parameters) - {"return"} - if unknown: - raise TypeError(f"Cannot annotate unknown parameters: {tuple(unknown)}") - func.__annotations__.update(annotation) - return func - - return decorate - - -def task(func): - """ - Promote a function to a :class:`~pydra.engine.task.FunctionTask`. - - Example - ------- - >>> import pydra - >>> @pydra.mark.task - ... def square(a: int) -> float: - ... return a ** 2.0 - - """ - from ..engine.task import FunctionTask - - @wraps(func) - def decorate(**kwargs): - return FunctionTask(func=func, **kwargs) - - return decorate diff --git a/pydra/mark/tests/test_functions.py b/pydra/mark/tests/test_functions.py deleted file mode 100644 index 4be0343f1f..0000000000 --- a/pydra/mark/tests/test_functions.py +++ /dev/null @@ -1,219 +0,0 @@ -import pytest -import random -import typing as ty - -from ..functions import task, annotate -from ...engine.task import FunctionTask - - -def test_task_equivalence(): - def add_two(a): - return a + 2 - - canonical = FunctionTask(add_two, a=3) - - decorated1 = task(add_two)(a=3) - - @task - def addtwo(a): - return a + 2 - - decorated2 = addtwo(a=3) - - assert canonical.checksum == decorated1.checksum - - c_res = canonical._run() - d1_res = decorated1._run() - d2_res = decorated2._run() - - assert c_res.output.hash == d1_res.output.hash - assert c_res.output.hash == d2_res.output.hash - - -def test_annotation_equivalence_1(): - """testing various ways of annotation: one output, only types provided""" - - def direct(a: int) -> int: - return a + 2 - - @annotate({"return": int}) - def partial(a: int): - return a + 2 - - @annotate({"a": int, "return": int}) - def indirect(a): - return a + 2 - - # checking if the annotations are equivalent - assert direct.__annotations__ == partial.__annotations__ - assert direct.__annotations__ == indirect.__annotations__ - - # Run functions to ensure behavior is unaffected - a = random.randint(0, (1 << 32) - 3) - assert direct(a) == partial(a) - assert direct(a) == indirect(a) - - # checking if the annotation is properly converted to output_spec if used in task - task_direct = task(direct)() - assert task_direct.output_spec.fields[0] == ("out", int) - - -def test_annotation_equivalence_2(): - """testing various ways of annotation: multiple outputs, using a tuple for output annot.""" - - def direct(a: int) -> (int, float): - return a + 2, a + 2.0 - - @annotate({"return": (int, float)}) - def partial(a: int): - return a + 2, a + 2.0 - - @annotate({"a": int, "return": (int, float)}) - def indirect(a): - return a + 2, a + 2.0 - - # checking if the annotations are equivalent - assert direct.__annotations__ == partial.__annotations__ - assert direct.__annotations__ == indirect.__annotations__ - - # Run functions to ensure behavior is unaffected - a = random.randint(0, (1 << 32) - 3) - assert direct(a) == partial(a) - assert direct(a) == indirect(a) - - # checking if the annotation is properly converted to output_spec if used in task - task_direct = task(direct)() - assert task_direct.output_spec.fields == [("out1", int), ("out2", float)] - - -def test_annotation_equivalence_3(): - """testing various ways of annotation: using dictionary for output annot.""" - - def direct(a: int) -> {"out1": int}: - return a + 2 - - @annotate({"return": {"out1": int}}) - def partial(a: int): - return a + 2 - - @annotate({"a": int, "return": {"out1": int}}) - def indirect(a): - return a + 2 - - # checking if the annotations are equivalent - assert direct.__annotations__ == partial.__annotations__ - assert direct.__annotations__ == indirect.__annotations__ - - # Run functions to ensure behavior is unaffected - a = random.randint(0, (1 << 32) - 3) - assert direct(a) == partial(a) - assert direct(a) == indirect(a) - - # checking if the annotation is properly converted to output_spec if used in task - task_direct = task(direct)() - assert task_direct.output_spec.fields[0] == ("out1", int) - - -def test_annotation_equivalence_4(): - """testing various ways of annotation: using ty.NamedTuple for the output""" - - def direct(a: int) -> ty.NamedTuple("Output", [("sum", int), ("sub", int)]): - return a + 2, a - 2 - - @annotate({"return": ty.NamedTuple("Output", [("sum", int), ("sub", int)])}) - def partial(a: int): - return a + 2, a - 2 - - @annotate( - {"a": int, "return": ty.NamedTuple("Output", [("sum", int), ("sub", int)])} - ) - def indirect(a): - return a + 2, a - 2 - - # checking if the annotations are equivalent - assert ( - direct.__annotations__["return"].__annotations__ - == partial.__annotations__["return"].__annotations__ - == indirect.__annotations__["return"].__annotations__ - ) - assert ( - direct.__annotations__["return"].__name__ - == partial.__annotations__["return"].__name__ - == indirect.__annotations__["return"].__name__ - ) - - # Run functions to ensure behavior is unaffected - a = random.randint(0, (1 << 32) - 3) - assert direct(a) == partial(a) - assert direct(a) == indirect(a) - - # checking if the annotation is properly converted to output_spec if used in task - task_direct = task(direct)() - assert task_direct.output_spec.fields == [("sum", int), ("sub", int)] - - -def test_annotation_override(): - @annotate({"a": float, "return": float}) - def annotated(a: int) -> int: - return a + 2 - - assert annotated.__annotations__ == {"a": float, "return": float} - - -def test_invalid_annotation(): - with pytest.raises(TypeError): - - @annotate({"b": int}) - def addtwo(a): - return a + 2 - - -def test_annotated_task(): - @task - def square(in_val: float): - return in_val**2 - - res = square(in_val=2.0)() - assert res.output.out == 4.0 - - -def test_return_annotated_task(): - @task - @annotate({"in_val": float, "return": {"squared": float}}) - def square(in_val): - return in_val**2 - - res = square(in_val=2.0)() - assert res.output.squared == 4.0 - - -def test_return_halfannotated_annotated_task(): - @task - @annotate({"in_val": float, "return": float}) - def square(in_val): - return in_val**2 - - res = square(in_val=2.0)() - assert res.output.out == 4.0 - - -def test_return_annotated_task_multiple_output(): - @task - @annotate({"in_val": float, "return": {"squared": float, "cubed": float}}) - def square(in_val): - return in_val**2, in_val**3 - - res = square(in_val=2.0)() - assert res.output.squared == 4.0 - assert res.output.cubed == 8.0 - - -def test_return_halfannotated_task_multiple_output(): - @task - @annotate({"in_val": float, "return": (float, float)}) - def square(in_val): - return in_val**2, in_val**3 - - res = square(in_val=2.0)() - assert res.output.out1 == 4.0 - assert res.output.out2 == 8.0 diff --git a/pydra/tasks/__init__.py b/pydra/tasks/__init__.py deleted file mode 100644 index fae53c2d92..0000000000 --- a/pydra/tasks/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -""" Pydra tasks - -The ``pydra.tasks`` namespace is reserved for collections of Tasks, to be managed and -packaged separately. -To create a task package, please fork the `pydra-tasks-template -`__. -""" - -# This call enables pydra.tasks to be used as a namespace package when installed -# in editable mode. In normal installations it has no effect. -__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/pydra/tasks/common/__init__.py b/pydra/tasks/common/__init__.py new file mode 100644 index 0000000000..0081c43cb0 --- /dev/null +++ b/pydra/tasks/common/__init__.py @@ -0,0 +1,9 @@ +import json +from fileformats.application import Json +from pydra.design import python + + +@python.define +def LoadJson(file: Json) -> dict | list: + with open(file) as f: + return json.load(f) diff --git a/pydra/tasks/testing/__init__.py b/pydra/tasks/testing/__init__.py new file mode 100644 index 0000000000..e0aa35669a --- /dev/null +++ b/pydra/tasks/testing/__init__.py @@ -0,0 +1,82 @@ +from pydra.design import python, workflow + + +@python.define +def Add(x: float, y: float) -> float: + return x + y + + +@python.define +def Divide(x: float, y: float) -> float: + return x / y + + +@python.define +def SafeDivide(x: float, y: float) -> float: + if y == 0: + return float("nan") + return x / y + + +@python.define +def Subtract(x: float, y: float) -> float: + return x - y + + +@workflow.define +def UnsafeDivisionWorkflow(a: float, b: float, denominator: float) -> float: + """Adds 'a' and 'b' together, divides by 'denominator', and then subtracts 'b' from + the output. Division by 0 is not guarded against so the workflow will fail if + the value passed to the 'denominator' parameter is 0. + + Parameters + ---------- + a : float + The first number to add. + b : float + The second number to add. + denominator : float + The number to divide the sum of 'a' and 'b' by. + + Returns + ------- + out : float + The result of subtracting 'b' from the result of dividing the sum of 'a' and + 'b' by 'denominator'. + """ + add = workflow.add(Add(x=a, y=b)) + divide = workflow.add(Divide(x=add.out, y=denominator)) + subtract = workflow.add(Subtract(x=divide.out, y=b)) + return subtract.out + + +@workflow.define +def SafeDivisionWorkflow(a: float, b: float, denominator: float) -> float: + """Adds 'a' and 'b' together, divides by 'denominator', and then subtracts 'b' from + the output. Division by 0 is not guarded against so the workflow will fail if + the value passed to the 'denominator' parameter is 0. + + Parameters + ---------- + a : float + The first number to add. + b : float + The second number to add. + denominator : float + The number to divide the sum of 'a' and 'b' by. + + Returns + ------- + out : float + The result of subtracting 'b' from the result of dividing the sum of 'a' and + 'b' by 'denominator'. + """ + add = workflow.add(Add(x=a, y=b)) + divide = workflow.add(SafeDivide(x=add.out, y=denominator)) + subtract = workflow.add(Subtract(x=divide.out, y=b)) + return subtract.out + + +@python.define +def TenToThePower(p: int) -> int: + return 10**p diff --git a/pydra/utils/__init__.py b/pydra/utils/__init__.py index cfde94dbf8..1e36208886 100644 --- a/pydra/utils/__init__.py +++ b/pydra/utils/__init__.py @@ -1 +1,6 @@ -from .misc import user_cache_dir, add_exc_note, exc_info_matches # noqa: F401 +from .misc import ( # noqa: F401 + user_cache_dir, + default_run_cache_dir, + add_exc_note, + exc_info_matches, +) diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index 3ba3e97b44..0d013daacd 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -2,7 +2,10 @@ import sys import os +import re +import ast import struct +import inspect from datetime import datetime import typing as ty import types @@ -21,19 +24,24 @@ from filelock import SoftFileLock import attrs.exceptions from fileformats.core.fileset import FileSet, MockMixin +from fileformats.generic import FsObject +import fileformats.core.exceptions from . import user_cache_dir, add_exc_note +from .misc import in_stdlib logger = logging.getLogger("pydra") +FUNCTION_SRC_CHUNK_LEN_DEFAULT = 8192 + try: from typing import Protocol except ImportError: - from typing_extensions import Protocol # type: ignore + from typing import Protocol # type: ignore try: from typing import runtime_checkable except ImportError: - from typing_extensions import runtime_checkable # type: ignore + from typing import runtime_checkable # type: ignore try: @@ -322,10 +330,19 @@ def bytes_repr(obj: object, cache: Cache) -> Iterator[bytes]: if attrs.has(type(obj)): # Drop any attributes that aren't used in comparisons by default dct = attrs.asdict(obj, recurse=False, filter=lambda a, _: bool(a.eq)) - elif hasattr(obj, "__slots__"): + elif hasattr(obj, "__slots__") and obj.__slots__ is not None: dct = {attr: getattr(obj, attr) for attr in obj.__slots__} else: - dct = obj.__dict__ + + def is_special_or_method(n: str): + return (n.startswith("__") and n.endswith("__")) or inspect.ismethod( + getattr(obj, n) + ) + + try: + dct = {n: v for n, v in obj.__dict__.items() if not is_special_or_method(n)} + except AttributeError: + dct = {n: getattr(obj, n) for n in dir(obj) if not is_special_or_method(n)} yield from bytes_repr_mapping_contents(dct, cache) yield b"}" @@ -439,33 +456,88 @@ def bytes_repr_dict(obj: dict, cache: Cache) -> Iterator[bytes]: yield b"}" +@register_serializer +def bytes_repr_module(obj: types.ModuleType, cache: Cache) -> Iterator[bytes]: + yield b"module:(" + yield hash_single(FsObject(obj.__file__), cache=cache) + yield b")" + + @register_serializer(ty._GenericAlias) @register_serializer(ty._SpecialForm) @register_serializer(type) def bytes_repr_type(klass: type, cache: Cache) -> Iterator[bytes]: - def type_name(tp): + from pydra.engine.helpers import list_fields + + def type_location(tp: type) -> bytes: + """Return the module and name of the type in a ASCII byte string""" try: - name = tp.__name__ + type_name = tp.__name__ except AttributeError: - name = tp._name - return name + type_name = tp._name + mod_path = ".".join( + p for p in klass.__module__.split(".") if not p.startswith("_") + ) + return f"{mod_path}.{type_name}".encode() yield b"type:(" origin = ty.get_origin(klass) - if origin: - yield f"{origin.__module__}.{type_name(origin)}[".encode() - for arg in ty.get_args(klass): + args = ty.get_args(klass) + if origin and args: + yield b"origin:(" + yield from bytes_repr_type(origin, cache) + yield b"),args:(" + for arg in args: if isinstance( arg, list ): # sometimes (e.g. Callable) the args of a type is a list - yield b"[" + yield b"list:(" yield from (b for t in arg for b in bytes_repr_type(t, cache)) - yield b"]" + yield b")" else: yield from bytes_repr_type(arg, cache) - yield b"]" + yield b")" else: - yield f"{klass.__module__}.{type_name(klass)}".encode() + if inspect.isclass(klass) and issubclass(klass, FileSet): + try: + yield b"mime-like:(" + klass.mime_like.encode() + b")" + except fileformats.core.exceptions.FormatDefinitionError: + yield type_location(klass) + elif fields := list_fields(klass): + yield b"fields:(" + yield from bytes_repr_sequence_contents(fields, cache) + yield b")" + if hasattr(klass, "Outputs"): + yield b",outputs:(" + yield from bytes_repr_type(klass.Outputs, cache) + yield b")" + elif in_stdlib(klass): + yield type_location(klass) + else: + try: + dct = { + n: v for n, v in klass.__dict__.items() if not n.startswith("__") + } + except AttributeError: + yield type_location(klass) + else: + yield b"__dict__:(" + yield from bytes_repr_mapping_contents(dct, cache) + yield b")" + # Include annotations + try: + annotations = klass.__annotations__ + except AttributeError: + pass + else: + yield b",annotations:(" + yield from bytes_repr_mapping_contents(annotations, cache) + yield b")" + yield b",mro:(" + yield from ( + b for t in klass.mro()[1:-1] for b in bytes_repr_type(t, cache) + ) + yield b")" yield b")" @@ -519,6 +591,77 @@ def bytes_repr_set(obj: Set, cache: Cache) -> Iterator[bytes]: yield b"}" +@register_serializer +def bytes_repr_code(obj: types.CodeType, cache: Cache) -> Iterator[bytes]: + yield b"code:(" + yield from bytes_repr_sequence_contents( + ( + obj.co_argcount, + obj.co_posonlyargcount, + obj.co_kwonlyargcount, + obj.co_nlocals, + obj.co_flags, + obj.co_code, + obj.co_consts, + obj.co_names, + obj.co_varnames, + obj.co_freevars, + obj.co_name, + obj.co_cellvars, + ), + cache, + ) + yield b")" + + +@register_serializer +def bytes_repr_function(obj: types.FunctionType, cache: Cache) -> Iterator[bytes]: + """Serialize a function, attempting to use the AST of the source code if available + otherwise falling back to the byte-code of the function.""" + yield b"function:(" + if in_stdlib(obj): + yield f"{obj.__module__}.{obj.__name__}".encode() + else: + try: + src = inspect.getsource(obj) + except OSError: + # Fallback to using the bytes representation of the code object + yield from bytes_repr(obj.__code__, cache) + else: + + def dump_ast(node: ast.AST) -> bytes: + return ast.dump( + node, annotate_fields=False, include_attributes=False + ).encode() + + def strip_annotations(node: ast.AST): + """Remove annotations from function arguments.""" + if hasattr(node, "args"): + for arg in node.args.args: + arg.annotation = None + for arg in node.args.kwonlyargs: + arg.annotation = None + if node.args.vararg: + node.args.vararg.annotation = None + if node.args.kwarg: + node.args.kwarg.annotation = None + + indent = re.match(r"(\s*)", src).group(1) + if indent: + src = re.sub(f"^{indent}", "", src, flags=re.MULTILINE) + try: + func_ast = ast.parse(src).body[0] + strip_annotations(func_ast) + if hasattr(func_ast, "args"): + yield dump_ast(func_ast.args) + if hasattr(func_ast, "body"): + for stmt in func_ast.body: + yield dump_ast(stmt) + except SyntaxError: + yield src.encode() + yield b")" + + def bytes_repr_mapping_contents(mapping: Mapping, cache: Cache) -> Iterator[bytes]: """Serialize the contents of a mapping @@ -535,6 +678,7 @@ def bytes_repr_mapping_contents(mapping: Mapping, cache: Cache) -> Iterator[byte yield from bytes_repr(key, cache) yield b"=" yield bytes(hash_single(mapping[key], cache)) + yield b"," def bytes_repr_sequence_contents(seq: Sequence, cache: Cache) -> Iterator[bytes]: diff --git a/pydra/utils/misc.py b/pydra/utils/misc.py index 45b6a5c3ba..2575343fd7 100644 --- a/pydra/utils/misc.py +++ b/pydra/utils/misc.py @@ -1,7 +1,14 @@ from pathlib import Path import re +import ast +import inspect +import types +import sysconfig +import sys import platformdirs -from pydra._version import __version__ +import builtins +import pkgutil +from pydra.engine._version import __version__ user_cache_dir = Path( platformdirs.user_cache_dir( @@ -11,6 +18,8 @@ ) ) +default_run_cache_dir = user_cache_dir / "run-cache" + def add_exc_note(e: Exception, note: str) -> Exception: """Adds a note to an exception in a Python <3.11 compatible way @@ -43,3 +52,128 @@ def exc_info_matches(exc_info, match, regex=False): return re.match(".*" + match, msg) else: return match in msg + + +def get_undefined_symbols( + func, exclude_signature_type_hints: bool = False, ignore_decorator: bool = False +): + """ + Check the source code of a function and detect any symbols that aren't defined in its scope. + + Parameters + ---------- + func : callable + The function to analyze. + + Returns + ------- + set + A set of undefined symbols. + """ + # Get the source code of the function + source = inspect.getsource(func) + + # De-indent the source code if required + indent = re.match(r"^\s*", source).group() + source = ("\n" + source).replace("\n" + indent, "\n") + + if ignore_decorator: + # Remove the decorator from the source code, i.e. everything before the first + # unindented 'def ' keyword. + source = re.match( + r"(.*\n)(def .*)", "\n" + source, flags=re.MULTILINE | re.DOTALL + ).group(2) + + # Parse the source code into an AST + tree = ast.parse(source) + + # Define a visitor class to traverse the AST + class SymbolVisitor(ast.NodeVisitor): + + def __init__(self): + # Initialize sets to track defined and used symbols + self.defined_symbols = set() + self.used_symbols = set() + + def visit_FunctionDef(self, node): + # Add function arguments to defined symbols + for arg in node.args.args: + self.defined_symbols.add(arg.arg) + if exclude_signature_type_hints: + # Exclude type hints from the defined symbols + type_hints_visitor = SymbolVisitor() + if node.returns: + type_hints_visitor.visit(node.returns) + for arg in node.args.args: + if arg.annotation: + type_hints_visitor.visit(arg.annotation) + type_hint_symbols = type_hints_visitor.used_symbols - self.used_symbols + self.generic_visit(node) + if exclude_signature_type_hints: + # Remove type hints from the used symbols + self.used_symbols -= type_hint_symbols + + def visit_Assign(self, node): + # Add assigned variables to defined symbols + for target in node.targets: + if isinstance(target, ast.Name): + self.defined_symbols.add(target.id) + self.generic_visit(node) + + def visit_Name(self, node): + # Add all variable names to used symbols + if isinstance(node.ctx, ast.Load): + self.used_symbols.add(node.id) + self.generic_visit(node) + + @property + def undefined_symbols(self): + return self.used_symbols - self.defined_symbols - get_builtin_type_names() + + # Create a visitor instance and visit the AST + visitor = SymbolVisitor() + visitor.visit(tree) + + return visitor.undefined_symbols + + +def get_builtin_type_names(): + """ + Get a list of built-in object type names in Python. + + Returns + ------- + set + A set of built-in object type names. + """ + return set(name for name, obj in vars(builtins).items() if isinstance(obj, type)) + + +def in_stdlib(obj: types.FunctionType | type) -> str | bool: + """Check if a type is in the standard library and return the name of the module if + so.""" + module = inspect.getmodule(obj) + if module is None: + return False + if module.__name__.startswith("builtins"): + return "builtins" + if module.__name__ == "types" and obj.__name__ not in dir(types): + return False + toplevel = module.__name__.split(".")[0] + if toplevel in STDLIB_MODULES: + return toplevel + return False + + +def _stdlib_modules() -> frozenset[str]: + """List all standard library modules.""" + std_lib_modules = set(sys.builtin_module_names) + std_lib_path = sysconfig.get_path("stdlib") + std_lib_modules.update(m[1] for m in pkgutil.iter_modules([std_lib_path])) + return frozenset(std_lib_modules) + + +STDLIB_MODULES: frozenset[str] = _stdlib_modules() + +# Example usage: +# print(list_standard_library_modules()) diff --git a/pydra/utils/tests/test_hash.py b/pydra/utils/tests/test_hash.py index de065a03de..abc4f2a444 100644 --- a/pydra/utils/tests/test_hash.py +++ b/pydra/utils/tests/test_hash.py @@ -10,7 +10,7 @@ import typing as ty from fileformats.application import Zip, Json from fileformats.text import TextFile -from ..hash import ( +from pydra.utils.hash import ( Cache, bytes_repr, hash_object, @@ -50,7 +50,7 @@ def test_bytes_repr_builtins(): assert complex_repr == b"complex:" + bytes(16) # Dicts are sorted by key, and values are hashed dict_repr = join_bytes_repr({"b": "c", "a": 0}) - assert re.match(rb"dict:{str:1:a=.{16}str:1:b=.{16}}$", dict_repr) + assert re.match(rb"dict:{str:1:a=.{16},str:1:b=.{16},}$", dict_repr) # Lists and tuples concatenate hashes of their contents list_repr = join_bytes_repr([1, 2, 3]) assert re.match(rb"list:\(.{48}\)$", list_repr) @@ -75,7 +75,7 @@ def test_bytes_repr_builtins(): (1, "6dc1db8d4dcdd8def573476cbb90cce0"), (12345678901234567890, "2b5ba668c1e8ea4902361b8d81e53074"), (1.0, "29492927b2e505840235e15a5be9f79a"), - ({"b": "c", "a": 0}, "2405cd36f4e4b6318c033f32db289f7d"), + ({"b": "c", "a": 0}, "04e5c65ec2269775d3b9ccecaf10da38"), ([1, 2, 3], "2f8902ff90f63d517bd6f6e6111e15b8"), ((1, 2, 3), "054a7b31c29e7875a6f83ff1dcb4841b"), ], @@ -142,7 +142,7 @@ def __init__(self, x): self.x = x obj_repr = join_bytes_repr(MyClass(1)) - assert re.match(rb".*\.MyClass:{str:1:x=.{16}}", obj_repr) + assert re.match(rb".*\.MyClass:{str:1:x=.{16},}", obj_repr) def test_bytes_repr_slots_obj(): @@ -153,7 +153,7 @@ def __init__(self, x): self.x = x obj_repr = join_bytes_repr(MyClass(1)) - assert re.match(rb".*\.MyClass:{str:1:x=.{16}}", obj_repr) + assert re.match(rb".*\.MyClass:{str:1:x=.{16},}", obj_repr) def test_bytes_repr_attrs_slots(): @@ -162,7 +162,7 @@ class MyClass: x: int obj_repr = join_bytes_repr(MyClass(1)) - assert re.match(rb".*\.MyClass:{str:1:x=.{16}}", obj_repr) + assert re.match(rb".*\.MyClass:{str:1:x=.{16},}", obj_repr) def test_bytes_repr_attrs_no_slots(): @@ -171,7 +171,7 @@ class MyClass: x: int obj_repr = join_bytes_repr(MyClass(1)) - assert re.match(rb".*\.MyClass:{str:1:x=.{16}}", obj_repr) + assert re.match(rb".*\.MyClass:{str:1:x=.{16},}", obj_repr) def test_bytes_repr_type1(): @@ -181,31 +181,44 @@ def test_bytes_repr_type1(): def test_bytes_repr_type1a(): obj_repr = join_bytes_repr(Zip[Json]) - assert obj_repr == rb"type:(fileformats.application.archive.Json__Zip)" + assert obj_repr == rb"type:(mime-like:(application/json+zip))" def test_bytes_repr_type2(): T = ty.TypeVar("T") class MyClass(ty.Generic[T]): - pass + + a: int + b: str + + def method(self, f: float) -> float: + return f + 1 obj_repr = join_bytes_repr(MyClass[int]) - assert ( - obj_repr == b"type:(pydra.utils.tests.test_hash.MyClass[type:(builtins.int)])" + assert re.match( + ( + rb"type:\(origin:\(type:\(__dict__:\(str:6:method=.{16},\),annotations:\(str:1:a=.{16}," + rb"str:1:b=.{16},\),mro:\(type:\(typing.Generic\)\)\)\),args:\(type:\(builtins.int\)\)\)" + ), + obj_repr, ) def test_bytes_special_form1(): obj_repr = join_bytes_repr(ty.Union[int, float]) - assert obj_repr == b"type:(typing.Union[type:(builtins.int)type:(builtins.float)])" + assert obj_repr == ( + b"type:(origin:(type:(typing.Union)),args:(type:(builtins.int)" + b"type:(builtins.float)))" + ) @pytest.mark.skipif(condition=sys.version_info < (3, 10), reason="requires python3.10") def test_bytes_special_form1a(): obj_repr = join_bytes_repr(int | float) - assert ( - obj_repr == b"type:(types.UnionType[type:(builtins.int)type:(builtins.float)])" + assert obj_repr == ( + b"type:(origin:(type:(types.UnionType)),args:(type:(builtins.int)" + b"type:(builtins.float)))" ) @@ -216,30 +229,34 @@ def test_bytes_special_form2(): def test_bytes_special_form3(): obj_repr = join_bytes_repr(ty.Optional[Path]) - assert ( - obj_repr == b"type:(typing.Union[type:(pathlib.Path)type:(builtins.NoneType)])" + assert obj_repr == ( + b"type:(origin:(type:(typing.Union)),args:(type:(pathlib.Path)" + b"type:(builtins.NoneType)))" ) @pytest.mark.skipif(condition=sys.version_info < (3, 10), reason="requires python3.10") def test_bytes_special_form3a(): obj_repr = join_bytes_repr(Path | None) - assert ( - obj_repr - == b"type:(types.UnionType[type:(pathlib.Path)type:(builtins.NoneType)])" + assert obj_repr == ( + b"type:(origin:(type:(types.UnionType)),args:(type:(pathlib.Path)" + b"type:(builtins.NoneType)))" ) def test_bytes_special_form4(): obj_repr = join_bytes_repr(ty.Type[Path]) - assert obj_repr == b"type:(builtins.type[type:(pathlib.Path)])" + assert ( + obj_repr == b"type:(origin:(type:(builtins.type)),args:(type:(pathlib.Path)))" + ) def test_bytes_special_form5(): obj_repr = join_bytes_repr(ty.Callable[[Path, int], ty.Tuple[float, str]]) assert obj_repr == ( - b"type:(collections.abc.Callable[[type:(pathlib.Path)type:(builtins.int)]" - b"type:(builtins.tuple[type:(builtins.float)type:(builtins.str)])])" + b"type:(origin:(type:(collections.abc.Callable)),args:(list:(type:(pathlib.Path)" + b"type:(builtins.int))type:(origin:(type:(builtins.tuple))," + b"args:(type:(builtins.float)type:(builtins.str)))))" ) @@ -423,8 +440,7 @@ def __repr__(self): with pytest.raises( TypeError, match=( - "unhashable\nand therefore cannot hash `A\(\)` of type " - "`pydra.utils.tests.test_hash.A`" + r"unhashable\nand therefore cannot hash `A\(\)` of type `.*\.test_hash\.A`" ), ): hash_object(A()) diff --git a/pydra/utils/tests/test_typing.py b/pydra/utils/tests/test_typing.py index f83eedbd8c..60f92fbc3c 100644 --- a/pydra/utils/tests/test_typing.py +++ b/pydra/utils/tests/test_typing.py @@ -4,18 +4,20 @@ import typing as ty from pathlib import Path import tempfile +from unittest.mock import Mock import pytest -from pydra import mark -from ...engine.specs import File, LazyOutField, MultiInputObj -from ..typing import TypeParser -from pydra import Workflow +from pydra.design import python +from fileformats.generic import File +from pydra.engine.lazy import LazyOutField +from pydra.design import workflow +from ..typing import TypeParser, MultiInputObj from fileformats.application import Json, Yaml, Xml from .utils import ( - generic_func_task, + GenericFuncTask, GenericShellTask, - specific_func_task, + SpecificFuncTask, SpecificShellTask, - other_specific_func_task, + OtherSpecificFuncTask, OtherSpecificShellTask, MyFormatX, MyOtherFormatX, @@ -26,7 +28,8 @@ def lz(tp: ty.Type): """convenience method for creating a LazyField of type 'tp'""" - return LazyOutField(name="foo", field="boo", type=tp) + node = Mock() + return LazyOutField(node=node, field="boo", type=tp) PathTypes = ty.Union[str, os.PathLike] @@ -485,29 +488,25 @@ def test_type_coercion_fail2a(): def test_type_coercion_fail3(): - with pytest.raises(TypeError) as exc_info: + with pytest.raises(TypeError, match="doesn't match any of the explicit inclusion"): TypeParser(ty.Sequence, coercible=[(ty.Sequence, ty.Sequence)])( {"a": 1, "b": 2} ) - assert exc_info_matches(exc_info, "doesn't match any of the explicit inclusion") def test_type_coercion_fail4(): - with pytest.raises(TypeError) as exc_info: + with pytest.raises(TypeError, match=r"Cannot coerce \{'a': 1\} into"): TypeParser(ty.Sequence, coercible=[(ty.Any, ty.Any)])({"a": 1}) - assert exc_info_matches(exc_info, "Cannot coerce {'a': 1} into") def test_type_coercion_fail5(): - with pytest.raises(TypeError) as exc_info: + with pytest.raises(TypeError, match="as 1 is not iterable"): TypeParser(ty.List[int], coercible=[(ty.Any, ty.Any)])(1) - assert exc_info_matches(exc_info, "as 1 is not iterable") def test_type_coercion_fail6(): - with pytest.raises(TypeError) as exc_info: + with pytest.raises(TypeError, match="is not a mapping type"): TypeParser(ty.List[ty.Dict[str, str]], coercible=[(ty.Any, ty.Any)])((1, 2, 3)) - assert exc_info_matches(exc_info, "is not a mapping type") def test_type_coercion_realistic(): @@ -520,26 +519,22 @@ def test_type_coercion_realistic(): Path.touch(yet_another_file) file_list = [File(p) for p in (a_file, another_file, yet_another_file)] - @mark.task - @mark.annotate({"return": {"a": ty.List[File], "b": ty.List[str]}}) + @python.define(outputs={"a": ty.List[File], "b": ty.List[str]}) def f(x: ty.List[File], y: ty.Dict[str, ty.List[File]]): return list(itertools.chain(x, *y.values())), list(y.keys()) - task = f(x=file_list, y={"a": file_list[1:]}) + defn = f(x=file_list, y={"a": file_list[1:]}) + outputs = defn() - TypeParser(ty.List[str])(task.lzout.a) # pylint: disable=no-member + TypeParser(ty.List[str])(outputs.a) # pylint: disable=no-member with pytest.raises( TypeError, + match=r"Incorrect type for field:", ) as exc_info: - TypeParser(ty.List[int])(task.lzout.a) # pylint: disable=no-member - assert exc_info_matches( - exc_info, - match=r"Cannot coerce into ", - regex=True, - ) + TypeParser(ty.List[int])(outputs.a) # pylint: disable=no-member with pytest.raises(TypeError) as exc_info: - task.inputs.x = "bad-value" + defn.x = "bad-value" assert exc_info_matches( exc_info, match="Cannot coerce 'bad-value' into " ) @@ -682,9 +677,9 @@ def test_type_matches(): @pytest.fixture(params=["func", "shell"]) -def generic_task(request): +def GenericTask(request): if request.param == "func": - return generic_func_task + return GenericFuncTask elif request.param == "shell": return GenericShellTask else: @@ -692,9 +687,9 @@ def generic_task(request): @pytest.fixture(params=["func", "shell"]) -def specific_task(request): +def SpecificTask(request): if request.param == "func": - return specific_func_task + return SpecificFuncTask elif request.param == "shell": return SpecificShellTask else: @@ -702,125 +697,71 @@ def specific_task(request): @pytest.fixture(params=["func", "shell"]) -def other_specific_task(request): +def OtherSpecificTask(request): if request.param == "func": - return other_specific_func_task + return OtherSpecificFuncTask elif request.param == "shell": return OtherSpecificShellTask else: assert False -def test_typing_implicit_cast_from_super(tmp_path, generic_task, specific_task): +def test_typing_implicit_cast_from_super(tmp_path, GenericTask, SpecificTask): """Check the casting of lazy fields and whether specific file-sets can be recovered from generic `File` classes""" - wf = Workflow( - name="test", - input_spec={"in_file": MyFormatX}, - output_spec={"out_file": MyFormatX}, - ) - - wf.add( - specific_task( - in_file=wf.lzin.in_file, - name="specific1", - ) - ) - - wf.add( # Generic task - generic_task( - in_file=wf.specific1.lzout.out, - name="generic", - ) - ) - - wf.add( - specific_task( - in_file=wf.generic.lzout.out, - name="specific2", - ) - ) - - wf.set_output( - [ - ("out_file", wf.specific2.lzout.out), - ] - ) + @workflow.define(outputs=["out_file"]) + def Workflow(in_file: MyFormatX) -> MyFormatX: + specific1 = workflow.add(SpecificTask(in_file=in_file)) + generic = workflow.add(GenericTask(in_file=specific1.out)) # Generic task + specific2 = workflow.add(SpecificTask(in_file=generic.out), name="specific2") + return specific2.out in_file = MyFormatX.sample() - result = wf(in_file=in_file, plugin="serial") + outputs = Workflow(in_file=in_file)() - out_file: MyFormatX = result.output.out_file + out_file: MyFormatX = outputs.out_file assert type(out_file) is MyFormatX assert out_file.parent != in_file.parent assert type(out_file.header) is MyHeader assert out_file.header.parent != in_file.header.parent -def test_typing_cast(tmp_path, specific_task, other_specific_task): +def test_typing_cast(tmp_path, SpecificTask, OtherSpecificTask): """Check the casting of lazy fields and whether specific file-sets can be recovered from generic `File` classes""" - wf = Workflow( - name="test", - input_spec={"in_file": MyFormatX}, - output_spec={"out_file": MyFormatX}, - ) + @workflow.define(outputs=["out_file"]) + def Workflow(in_file: MyFormatX) -> MyFormatX: + entry = workflow.add(SpecificTask(in_file=in_file)) - wf.add( - specific_task( - in_file=wf.lzin.in_file, - name="entry", - ) - ) + with pytest.raises(TypeError) as exc_info: + # No cast of generic task output to MyFormatX + workflow.add(OtherSpecificTask(in_file=entry.out)) # Generic task + assert exc_info_matches(exc_info, "Cannot coerce") - with pytest.raises(TypeError) as exc_info: - # No cast of generic task output to MyFormatX - wf.add( # Generic task - other_specific_task( - in_file=wf.entry.lzout.out, - name="inner", - ) + inner = workflow.add( # Generic task + OtherSpecificTask(in_file=workflow.cast(entry.out, MyOtherFormatX)) ) - assert exc_info_matches(exc_info, "Cannot coerce") - wf.add( # Generic task - other_specific_task( - in_file=wf.entry.lzout.out.cast(MyOtherFormatX), - name="inner", - ) - ) + with pytest.raises(TypeError) as exc_info: + # No cast of generic task output to MyFormatX + workflow.add(SpecificTask(in_file=inner.out)) - with pytest.raises(TypeError) as exc_info: - # No cast of generic task output to MyFormatX - wf.add( - specific_task( - in_file=wf.inner.lzout.out, - name="exit", - ) - ) - assert exc_info_matches(exc_info, "Cannot coerce") + assert exc_info_matches(exc_info, "Cannot coerce") - wf.add( - specific_task( - in_file=wf.inner.lzout.out.cast(MyFormatX), - name="exit", + exit = workflow.add( + SpecificTask(in_file=workflow.cast(inner.out, MyFormatX)), name="exit" ) - ) - wf.set_output( - [ - ("out_file", wf.exit.lzout.out), - ] - ) + return exit.out in_file = MyFormatX.sample() - result = wf(in_file=in_file, plugin="serial") + outputs = Workflow(in_file=in_file)() - out_file: MyFormatX = result.output.out_file + out_file: MyFormatX = outputs.out_file assert type(out_file) is MyFormatX assert out_file.parent != in_file.parent assert type(out_file.header) is MyHeader diff --git a/pydra/utils/tests/utils.py b/pydra/utils/tests/utils.py index 3582fa9eda..b178f2df24 100644 --- a/pydra/utils/tests/utils.py +++ b/pydra/utils/tests/utils.py @@ -1,11 +1,11 @@ -from fileformats.generic import File +from fileformats.generic import File, BinaryFile from fileformats.core.mixin import WithSeparateHeader, WithMagicNumber -from pydra import mark -from pydra.engine.task import ShellCommandTask +from pydra.engine.task import ShellDef from pydra.engine import specs +from pydra.design import shell, python -class MyFormat(WithMagicNumber, File): +class MyFormat(WithMagicNumber, BinaryFile): ext = ".my" magic_number = b"MYFORMAT" @@ -18,164 +18,81 @@ class MyFormatX(WithSeparateHeader, MyFormat): header_type = MyHeader -class MyOtherFormatX(WithMagicNumber, WithSeparateHeader, File): +class MyOtherFormatX(WithMagicNumber, WithSeparateHeader, BinaryFile): magic_number = b"MYFORMAT" ext = ".my" header_type = MyHeader -@mark.task -def generic_func_task(in_file: File) -> File: +@python.define +def GenericFuncTask(in_file: File) -> File: return in_file -generic_shell_input_fields = [ - ( - "in_file", - File, - { - "help_string": "the input file", - "argstr": "", - "copyfile": "copy", - }, - ), - ( - "out", - str, - { - "help_string": "output file name", - "argstr": "", - "position": -1, - "output_file_template": "{in_file}", - }, - ), -] - -generic_shell_input_spec = specs.SpecInfo( - name="Input", fields=generic_shell_input_fields, bases=(specs.ShellSpec,) -) - -generic_shell_output_fields = [ - ( - "out", - File, - { - "help_string": "output file", - }, - ), -] -generic_shelloutput_spec = specs.SpecInfo( - name="Output", fields=generic_shell_output_fields, bases=(specs.ShellOutSpec,) -) - - -class GenericShellTask(ShellCommandTask): - input_spec = generic_shell_input_spec - output_spec = generic_shelloutput_spec +@shell.define +class GenericShellTask(specs.ShellDef["GenericShellTask.Outputs"]): + """class with customized input and executables""" + + in_file: File = shell.arg( + help="the input file", + argstr="", + copy_mode="copy", + ) + + class Outputs(specs.ShellOutputs): + out: File = shell.outarg( + help="output file name", + argstr="", + position=-1, + path_template="{in_file}", + ) + executable = "echo" -@mark.task -def specific_func_task(in_file: MyFormatX) -> MyFormatX: +@python.define +def SpecificFuncTask(in_file: MyFormatX) -> MyFormatX: return in_file -specific_shell_input_fields = [ - ( - "in_file", - MyFormatX, - { - "help_string": "the input file", - "argstr": "", - "copyfile": "copy", - "sep": " ", - }, - ), - ( - "out", - str, - { - "help_string": "output file name", - "argstr": "", - "position": -1, - "output_file_template": "{in_file}", # Pass through un-altered - }, - ), -] - -specific_shell_input_spec = specs.SpecInfo( - name="Input", fields=specific_shell_input_fields, bases=(specs.ShellSpec,) -) - -specific_shell_output_fields = [ - ( - "out", - MyFormatX, - { - "help_string": "output file", - }, - ), -] -specific_shelloutput_spec = specs.SpecInfo( - name="Output", fields=specific_shell_output_fields, bases=(specs.ShellOutSpec,) -) - - -class SpecificShellTask(ShellCommandTask): - input_spec = specific_shell_input_spec - output_spec = specific_shelloutput_spec +@shell.define +class SpecificShellTask(specs.ShellDef["SpecificShellTask.Outputs"]): executable = "echo" + in_file: MyFormatX = shell.arg( + help="the input file", + argstr="", + copy_mode="copy", + ) + + class Outputs(specs.ShellOutputs): + out: MyFormatX = shell.outarg( + help="output file name", + argstr="", + position=-1, + path_template="{in_file}", # Pass through un-altered + ) -@mark.task -def other_specific_func_task(in_file: MyOtherFormatX) -> MyOtherFormatX: + +@python.define +def OtherSpecificFuncTask(in_file: MyOtherFormatX) -> MyOtherFormatX: return in_file -other_specific_shell_input_fields = [ - ( - "in_file", - MyOtherFormatX, - { - "help_string": "the input file", - "argstr": "", - "copyfile": "copy", - "sep": " ", - }, - ), - ( - "out", - str, - { - "help_string": "output file name", - "argstr": "", - "position": -1, - "output_file_template": "{in_file}", # Pass through un-altered - }, - ), -] - -other_specific_shell_input_spec = specs.SpecInfo( - name="Input", fields=other_specific_shell_input_fields, bases=(specs.ShellSpec,) -) - -other_specific_shell_output_fields = [ - ( - "out", - MyOtherFormatX, - { - "help_string": "output file", - }, - ), -] -other_specific_shelloutput_spec = specs.SpecInfo( - name="Output", - fields=other_specific_shell_output_fields, - bases=(specs.ShellOutSpec,), -) - - -class OtherSpecificShellTask(ShellCommandTask): - input_spec = other_specific_shell_input_spec - output_spec = other_specific_shelloutput_spec +class OtherSpecificShellTask(ShellDef): + + in_file: MyOtherFormatX = shell.arg( + help="the input file", + argstr="", + copy_mode="copy", + ) + + class Outputs(specs.ShellOutputs): + out: MyOtherFormatX = shell.outarg( + help="output file name", + argstr="", + position=-1, + path_template="{in_file}", # Pass through un-altered + ) + executable = "echo" diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index e40f928047..031b131845 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -1,27 +1,23 @@ import itertools import inspect from pathlib import Path +import collections.abc import os from copy import copy import sys import types import typing as ty import logging -import attr -from ..engine.specs import ( - LazyField, - StateArray, - MultiInputObj, - MultiOutputObj, -) -from ..utils import add_exc_note -from fileformats import field +import attrs +from pydra.utils import add_exc_note +from fileformats import field, core, generic + try: from typing import get_origin, get_args except ImportError: # Python < 3.8 - from typing_extensions import get_origin, get_args # type: ignore + from typing import get_origin, get_args # type: ignore if sys.version_info >= (3, 10): UNION_TYPES = (ty.Union, types.UnionType) @@ -46,6 +42,45 @@ TypeOrAny = ty.Union[type, ty.Any] +# These are special types that are checked for in the construction of input/output specs +# and special converters inserted into the attrs fields. + + +class MultiInputObj(list, ty.Generic[T]): + pass + + +MultiInputFile = MultiInputObj[generic.File] + + +# Since we can't create a NewType from a type union, we add a dummy type to the union +# so we can detect the MultiOutput in the input/output definition creation +class MultiOutputType: + pass + + +MultiOutputObj = ty.Union[list, object, MultiOutputType] +MultiOutputFile = ty.Union[generic.File, ty.List[generic.File], MultiOutputType] + +OUTPUT_TEMPLATE_TYPES = ( + Path, + ty.List[Path], + ty.Union[Path, bool], + ty.Union[ty.List[Path], bool], + ty.List[ty.List[Path]], +) + + +class StateArray(ty.List[T]): + """an array of values from, or to be split over in an array of nodes (see TaskBase.split()), + multiple nodes of the same task. Used in type-checking to differentiate between list + types and values for multiple nodes + """ + + def __repr__(self): + return f"{type(self).__name__}(" + ", ".join(repr(i) for i in self) + ")" + + class TypeParser(ty.Generic[T]): """A callable which can be used as a converter for attrs.fields to check whether an object or LazyField matches the specified field type, or can be @@ -86,6 +121,8 @@ class TypeParser(ty.Generic[T]): COERCIBLE_DEFAULT: ty.Tuple[ty.Tuple[type, type], ...] = ( ( (ty.Sequence, ty.Sequence), + (ty.Sequence, collections.abc.Set), + (collections.abc.Set, ty.Sequence), (ty.Mapping, ty.Mapping), (Path, os.PathLike), (str, os.PathLike), @@ -159,7 +196,7 @@ def expand_pattern(t): self.superclass_auto_cast = superclass_auto_cast self.match_any_of_union = match_any_of_union - def __call__(self, obj: ty.Any) -> ty.Union[T, LazyField[T]]: + def __call__(self, obj: ty.Any) -> T: """Attempts to coerce the object to the specified type, unless the value is a LazyField where the type of the field is just checked instead or an attrs.NOTHING where it is simply returned. @@ -180,24 +217,26 @@ def __call__(self, obj: ty.Any) -> ty.Union[T, LazyField[T]]: if the coercion is not possible, or not specified by the `coercible`/`not_coercible` parameters, then a TypeError is raised """ + from pydra.engine.helpers import is_lazy + coerced: T - if obj is attr.NOTHING: - coerced = attr.NOTHING # type: ignore[assignment] - elif isinstance(obj, LazyField): + if obj is attrs.NOTHING: + coerced = attrs.NOTHING # type: ignore[assignment] + elif is_lazy(obj): try: - self.check_type(obj.type) + self.check_type(obj._type) except TypeError as e: if self.superclass_auto_cast: try: # Check whether the type of the lazy field isn't a superclass of # the type to check against, and if so, allow it due to permissive # typing rules. - TypeParser(obj.type, match_any_of_union=True).check_type( + TypeParser(obj._type, match_any_of_union=True).check_type( self.tp ) except TypeError: raise TypeError( - f"Incorrect type for lazy field{self.label_str}: {obj.type!r} " + f"Incorrect type for lazy field{self.label_str}: {obj._type!r} " f"is not a subclass or superclass of {self.tp} (and will not " "be able to be coerced to one that is)" ) from e @@ -211,17 +250,25 @@ def __call__(self, obj: ty.Any) -> ty.Union[T, LazyField[T]]: ) else: raise TypeError( - f"Incorrect type for lazy field{self.label_str}: {obj.type!r} " + f"Incorrect type for lazy field{self.label_str}: {obj._type!r} " f"is not a subclass of {self.tp} (and will not be able to be " "coerced to one that is)" ) from e coerced = obj # type: ignore + if obj._type is not ty.Any: + # Used to check whether the type of the field can be changed + obj._type_checked = True elif isinstance(obj, StateArray): coerced = StateArray(self(o) for o in obj) # type: ignore[assignment] else: try: coerced = self.coerce(obj) except TypeError as e: + if obj is None: + raise TypeError( + f"Mandatory field{self.label_str} of type {self.tp} was not " + "provided a value (i.e. a value that wasn't None) " + ) from None raise TypeError( f"Incorrect type for field{self.label_str}: {obj!r} is not of type " f"{self.tp} (and cannot be coerced to it)" @@ -235,11 +282,13 @@ def coerce(self, object_: ty.Any) -> T: def expand_and_coerce(obj, pattern: ty.Union[type, tuple]): """Attempt to expand the object along the lines of the coercion pattern""" - if obj is attr.NOTHING: - return attr.NOTHING + if obj is attrs.NOTHING: + return attrs.NOTHING if not isinstance(pattern, tuple): return coerce_basic(obj, pattern) origin, pattern_args = pattern + if origin == MultiInputObj: + return coerce_multi_input(obj, pattern_args) if origin in UNION_TYPES: return coerce_union(obj, pattern_args) if origin is type: @@ -292,6 +341,21 @@ def coerce_union(obj, pattern_args): + "\n\n".join(f"{a} -> {e}" for a, e in zip(pattern_args, reasons)) ) + def coerce_multi_input(obj, pattern_args): + # Attempt to coerce the object into arg type of the MultiInputObj first, + # and if that fails, try to coerce it into a list of the arg type + try: + return coerce_sequence(list, obj, pattern_args) + except TypeError as e1: + try: + return [expand_and_coerce(obj, pattern_args[0])] + except TypeError as e2: + raise TypeError( + f"Could not coerce object ({obj!r}) to MultiInputObj[{pattern_args[0]}] " + f"either as sequence of {pattern_args[0]} ({e1}) or a single {pattern_args[0]} " + f"object to be wrapped in a list {e2}" + ) from e2 + def coerce_mapping( obj: ty.Mapping, type_: ty.Type[ty.Mapping], pattern_args: list ): @@ -368,26 +432,7 @@ def coerce_obj(obj, type_): f"Cannot coerce {obj!r} into {type_}{msg}{self.label_str}" ) from e - try: - return expand_and_coerce(object_, self.pattern) - except TypeError as e: - # Special handling for MultiInputObjects (which are annoying) - if isinstance(self.pattern, tuple) and self.pattern[0] == MultiInputObj: - # Attempt to coerce the object into arg type of the MultiInputObj first, - # and if that fails, try to coerce it into a list of the arg type - inner_type_parser = copy(self) - inner_type_parser.pattern = self.pattern[1][0] - try: - return [inner_type_parser.coerce(object_)] - except TypeError: - add_exc_note( - e, - "Also failed to coerce to the arg-type of the MultiInputObj " - f"({self.pattern[1][0]})", - ) - raise e - else: - raise e + return expand_and_coerce(object_, self.pattern) def check_type(self, type_: ty.Type[ty.Any]): """Checks the given type to see whether it matches or is a subtype of the @@ -584,6 +629,12 @@ def check_coercible(self, source: ty.Any, target: ty.Union[type, ty.Any]): If the object cannot be coerced into the target type depending on the explicit inclusions and exclusions set in the `coercible` and `not_coercible` member attrs """ + if ( + isinstance(source, ty.Sequence) + and issubclass(target, core.FileSet) + and all(isinstance(p, os.PathLike) for p in source) + ): + return True self.check_type_coercible(type(source), target, source_repr=repr(source)) def check_type_coercible( @@ -959,8 +1010,6 @@ def strip_splits(cls, type_: ty.Type[ty.Any]) -> ty.Tuple[ty.Type, int]: ---------- type_ : ty.Type[ty.Any] the type to list the nested sequences of - only_splits : bool, optional - whether to only return nested splits, not all sequence types Returns ------- @@ -988,3 +1037,87 @@ def label_str(self): get_origin = staticmethod(get_origin) get_args = staticmethod(get_args) + + +def is_union(type_: type, args: list[type] = None) -> bool: + """Checks whether a type is a Union, in either ty.Union[T, U] or T | U form + + Parameters + ---------- + type_ : type + the type to check + args : list[type], optional + required arguments of the union to check, by default (None) any args will match + + Returns + ------- + is_union : bool + whether the type is a Union type + """ + if ty.get_origin(type_) in UNION_TYPES: + if args is not None: + return ty.get_args(type_) == args + return True + return False + + +def is_optional(type_: type) -> bool: + """Check if the type is Optional, i.e. a union containing None""" + if is_union(type_): + return any(a is type(None) or is_optional(a) for a in ty.get_args(type_)) + return False + + +def optional_type(type_: type) -> type: + """Gets the non-None args of an optional type (i.e. a union with a None arg)""" + if is_optional(type_): + non_none = [a for a in ty.get_args(type_) if a is not type(None)] + if len(non_none) == 1: + return non_none[0] + return ty.Union[tuple(non_none)] + return type_ + + +def is_multi_input(type_: type) -> bool: + """Check if the type is a MultiInputObj""" + type_ = optional_type(type_) + return MultiInputObj in (type_, ty.get_origin(type_)) + + +def is_fileset_or_union(type_: type, allow_none: bool | None = None) -> bool: + """Check if the type is a FileSet or a Union containing a FileSet + + Parameters + ---------- + type_ : type + the type to check + allow_none : bool, optional + whether to allow None as a valid type, by default None. If None, then None + is not allowed at the outer layer, but is allowed within a Union + + Returns + ------- + is_fileset : bool + whether the type is a FileSet or a Union containing a FileSet + """ + if type_ is None and allow_none: + return True + if is_union(type_): + return any( + is_fileset_or_union(t, allow_none=allow_none or allow_none is None) + for t in ty.get_args(type_) + ) + elif not inspect.isclass(type_): + return False + return issubclass(type_, core.FileSet) + + +def is_type(*args: ty.Any) -> bool: + """check that the value is a type or generic""" + if len(args) == 3: # attrs validator + val = args[2] + elif len(args) != 1: + raise TypeError(f"is_type() takes 1 or 3 arguments, not {args}") + else: + val = args[0] + return inspect.isclass(val) or ty.get_origin(val) diff --git a/pyproject.toml b/pyproject.toml index ba862339cd..98b65a7c94 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,36 +1,26 @@ [build-system] -requires = ["flit_scm"] -build-backend = "flit_scm:buildapi" +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" [project] name = "pydra" description = "Pydra dataflow engine" readme = "README.rst" -requires-python = ">=3.8" +requires-python = ">=3.11" dependencies = [ - "attrs >=19.1.0", + "attrs >=24.2.0", "cloudpickle >=2.0.0", "etelemetry >=0.2.2", "filelock >=3.0.0", - "fileformats >=0.8", - "importlib_resources >=5.7; python_version < '3.11'", + "fileformats >=0.15a4", "platformdirs >=2", - "typing_extensions >=4.6.3; python_version < '3.10'", - "typing_utils >=0.1.0; python_version < '3.10'", -] -license = {file = "LICENSE"} -authors = [ - {name = "Nipype developers", email = "neuroimaging@python.org"}, ] +license = { file = "LICENSE" } +authors = [{ name = "Nipype developers", email = "neuroimaging@python.org" }] maintainers = [ - {name = "Nipype developers", email = "neuroimaging@python.org"}, -] -keywords = [ - "brainweb", - "dataflow", - "neuroimaging", - "pydra", + { name = "Nipype developers", email = "neuroimaging@python.org" }, ] +keywords = ["brainweb", "dataflow", "neuroimaging", "pydra"] classifiers = [ "Development Status :: 3 - Alpha", "Environment :: Console", @@ -39,31 +29,41 @@ classifiers = [ "Operating System :: MacOS :: MacOS X", "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering", ] dynamic = ["version"] [project.optional-dependencies] -psij = [ - "psij-python", -] -dask = [ - "dask", - "distributed", -] -dev = [ - "black", - "pre-commit", - "pydra[test]", -] +psij = ["psij-python"] +dask = ["dask", "distributed"] +dev = ["black", "pre-commit", "pydra[test]"] doc = [ + "fileformats-extras >= v0.15.0a4", + "fileformats-medimage >= v0.10.0a2", + "fileformats-medimage-extras >= v0.10.0a2", + "furo>=2022.2.14.1", + "ipython", + "ipykernel", + "ipywidgets", + "matplotlib", + "nbsphinx", + "nest_asyncio", + "nibabel", + "nilearn", + "numpy", + "numpydoc>=0.6.0", + "openneuro-py", "packaging", + "pandas", + "pandoc", + "pydra-mrtrix3 >=3.0.4a17", + "scipy", "sphinx ==6.2.1", + "sphinx-argparse", + "sphinx-click", "sphinx_rtd_theme", "sphinxcontrib-apidoc ~=0.3.0", "sphinxcontrib-versioning", @@ -75,7 +75,9 @@ test = [ "pytest-xdist <2.0", "pytest-rerunfailures", "pytest-timeout", + "pympler", "codecov", + "fileformats-extras >=0.15a4", "numpy", "pyld", "psutil", @@ -84,8 +86,27 @@ test = [ "boutiques", "pympler", ] -jupyter = [ - "nest_asyncio" +tutorial = [ + "fileformats-extras >= v0.15.0a3", + "fileformats-medimage >= v0.10.0a2", + "fileformats-medimage-extras >= v0.10.0a2", + "jupyter", + "jupyter_contrib_nbextensions", + "jupytext", + "jupyterlab", + "matplotlib", + "nbformat", + "nbval", + "nest_asyncio", + "nibabel", + "nilearn", + "numpy", + "openneuro-py", + "pandas", + "psutil", + "pydra-mrtrix3 >=3.0.4a17", + "scipy", + "sh", ] # Aliases tests = ["pydra[test]"] @@ -97,18 +118,23 @@ documentation = "https://nipype.github.io/pydra/" homepage = "https://nipype.github.io/pydra/" repository = "https://github.com/nipype/pydra.git" -[tool.flit.module] -name = "pydra" +[tool.hatch.build] +packages = ["pydra"] +exclude = ["tests"] +include = ["./pydra"] + +[tool.hatch.version] +source = "vcs" -[tool.flit.sdist] -exclude = [".gitignore"] +[tool.hatch.build.hooks.vcs] +version-file = "pydra/engine/_version.py" -[tool.setuptools_scm] -write_to = "pydra/_version.py" +[tool.hatch.metadata] +allow-direct-references = true [tool.black] -target-version = ['py37', 'py38'] -exclude = "pydra/_version.py" +target-version = ['py38'] +exclude = "pydra/engine/_version.py" [tool.codespell] -ignore-words-list = "nd,afile,inpt" +ignore-words-list = "nd,afile,inpt,fpr" diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000000..8114205dd8 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = -vv diff --git a/tutorial/README.md b/tutorial/README.md deleted file mode 100644 index 4df55ac5a0..0000000000 --- a/tutorial/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Pydra Tutorial - -Python Tutorial has been moved to a separate [GitHub repository](https://github.com/nipype/pydra-tutorial). - -The interactive tutorial is available at [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nipype/pydra-tutorial/master?filepath=notebooks)