diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index b290e09..97c8c97 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,20 +1,20 @@ { "name": "nfcore", - "image": "nfcore/gitpod:latest", - "remoteUser": "gitpod", - "runArgs": ["--privileged"], + "image": "nfcore/devcontainer:latest", - // Configure tool-specific properties. - "customizations": { - // Configure properties specific to VS Code. - "vscode": { - // Set *default* container specific settings.json values on container create. - "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python" - }, + "remoteUser": "root", + "privileged": true, - // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] - } + "remoteEnv": { + // Workspace path on the host for mounting with docker-outside-of-docker + "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" + }, + + "onCreateCommand": "./.devcontainer/setup.sh", + + "hostRequirements": { + "cpus": 4, + "memory": "16gb", + "storage": "32gb" } } diff --git a/.devcontainer/setup.sh b/.devcontainer/setup.sh new file mode 100755 index 0000000..2ca6343 --- /dev/null +++ b/.devcontainer/setup.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# Customise the terminal command prompt +echo "export PROMPT_DIRTRIM=2" >> $HOME/.bashrc +echo "export PS1='\[\e[3;36m\]\w ->\[\e[0m\\] '" >> $HOME/.bashrc +export PROMPT_DIRTRIM=2 +export PS1='\[\e[3;36m\]\w ->\[\e[0m\\] ' + +# Update Nextflow +nextflow self-update + +# Update welcome message +echo "Welcome to the nf-core/deepmutscan devcontainer!" > /usr/local/etc/vscode-dev-containers/first-run-notice.txt diff --git a/.editorconfig b/.editorconfig deleted file mode 100644 index 6d9b74c..0000000 --- a/.editorconfig +++ /dev/null @@ -1,37 +0,0 @@ -root = true - -[*] -charset = utf-8 -end_of_line = lf -insert_final_newline = true -trim_trailing_whitespace = true -indent_size = 4 -indent_style = space - -[*.{md,yml,yaml,html,css,scss,js}] -indent_size = 2 - -# These files are edited and tested upstream in nf-core/modules -[/modules/nf-core/**] -charset = unset -end_of_line = unset -insert_final_newline = unset -trim_trailing_whitespace = unset -indent_style = unset -[/subworkflows/nf-core/**] -charset = unset -end_of_line = unset -insert_final_newline = unset -trim_trailing_whitespace = unset -indent_style = unset - -[/assets/email*] -indent_size = unset - -# ignore python and markdown -[*.{py,md}] -indent_style = unset - -# ignore ro-crate metadata files -[**/ro-crate-metadata.json] -insert_final_newline = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 12dd4a0..dc13efb 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -78,7 +78,7 @@ If you wish to contribute a new step, please use the following coding standards: 5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. -8. If applicable, add a new test command in `.github/workflow/ci.yml`. +8. If applicable, add a new test in the `tests` directory. 9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. 10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. diff --git a/.github/actions/get-shards/action.yml b/.github/actions/get-shards/action.yml new file mode 100644 index 0000000..3408527 --- /dev/null +++ b/.github/actions/get-shards/action.yml @@ -0,0 +1,69 @@ +name: "Get number of shards" +description: "Get the number of nf-test shards for the current CI job" +inputs: + max_shards: + description: "Maximum number of shards allowed" + required: true + paths: + description: "Component paths to test" + required: false + tags: + description: "Tags to pass as argument for nf-test --tag parameter" + required: false +outputs: + shard: + description: "Array of shard numbers" + value: ${{ steps.shards.outputs.shard }} + total_shards: + description: "Total number of shards" + value: ${{ steps.shards.outputs.total_shards }} +runs: + using: "composite" + steps: + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: ${{ env.NFT_VER }} + - name: Get number of shards + id: shards + shell: bash + run: | + # Run nf-test with dynamic parameter + nftest_output=$(nf-test test \ + --profile +docker \ + $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \ + --dry-run \ + --ci \ + --changed-since HEAD^) || { + echo "nf-test command failed with exit code $?" + echo "Full output: $nftest_output" + exit 1 + } + echo "nf-test dry-run output: $nftest_output" + + # Default values for shard and total_shards + shard="[]" + total_shards=0 + + # Check if there are related tests + if echo "$nftest_output" | grep -q 'No tests to execute'; then + echo "No related tests found." + else + # Extract the number of related tests + number_of_shards=$(echo "$nftest_output" | sed -n 's|.*Executed \([0-9]*\) tests.*|\1|p') + if [[ -n "$number_of_shards" && "$number_of_shards" -gt 0 ]]; then + shards_to_run=$(( $number_of_shards < ${{ inputs.max_shards }} ? $number_of_shards : ${{ inputs.max_shards }} )) + shard=$(seq 1 "$shards_to_run" | jq -R . | jq -c -s .) + total_shards="$shards_to_run" + else + echo "Unexpected output format. Falling back to default values." + fi + fi + + # Write to GitHub Actions outputs + echo "shard=$shard" >> $GITHUB_OUTPUT + echo "total_shards=$total_shards" >> $GITHUB_OUTPUT + + # Debugging output + echo "Final shard array: $shard" + echo "Total number of shards: $total_shards" diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml new file mode 100644 index 0000000..3b9724c --- /dev/null +++ b/.github/actions/nf-test/action.yml @@ -0,0 +1,111 @@ +name: "nf-test Action" +description: "Runs nf-test with common setup steps" +inputs: + profile: + description: "Profile to use" + required: true + shard: + description: "Shard number for this CI job" + required: true + total_shards: + description: "Total number of test shards(NOT the total number of matrix jobs)" + required: true + paths: + description: "Test paths" + required: true + tags: + description: "Tags to pass as argument for nf-test --tag parameter" + required: false +runs: + using: "composite" + steps: + - name: Setup Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ env.NXF_VERSION }}" + + - name: Set up Python + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 + with: + python-version: "3.14" + + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: "${{ env.NFT_VER }}" + install-pdiff: true + + - name: Setup apptainer + if: contains(inputs.profile, 'singularity') + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: contains(inputs.profile, 'singularity') + shell: bash + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Conda setup + if: contains(inputs.profile, 'conda') + uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3 + with: + auto-update-conda: true + conda-solver: libmamba + channels: conda-forge + channel-priority: strict + conda-remove-defaults: true + + - name: Run nf-test + shell: bash + env: + NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + run: | + nf-test test \ + --profile=+${{ inputs.profile }} \ + $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \ + --ci \ + --changed-since HEAD^ \ + --verbose \ + --tap=test.tap \ + --shard ${{ inputs.shard }}/${{ inputs.total_shards }} + + # Save the absolute path of the test.tap file to the output + echo "tap_file_path=$(realpath test.tap)" >> $GITHUB_OUTPUT + + - name: Generate test summary + if: always() + shell: bash + run: | + # Add header if it doesn't exist (using a token file to track this) + if [ ! -f ".summary_header" ]; then + echo "# 🚀 nf-test results" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Status | Test Name | Profile | Shard |" >> $GITHUB_STEP_SUMMARY + echo "|:------:|-----------|---------|-------|" >> $GITHUB_STEP_SUMMARY + touch .summary_header + fi + + if [ -f test.tap ]; then + while IFS= read -r line; do + if [[ $line =~ ^ok ]]; then + test_name="${line#ok }" + # Remove the test number from the beginning + test_name="${test_name#* }" + echo "| ✅ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + elif [[ $line =~ ^not\ ok ]]; then + test_name="${line#not ok }" + # Remove the test number from the beginning + test_name="${test_name#* }" + echo "| ❌ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + fi + done < test.tap + else + echo "| ⚠️ | No test results found | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + fi + + - name: Clean up + if: always() + shell: bash + run: | + sudo rm -rf /home/ubuntu/tests/ diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index e47925c..e7c08d6 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -4,44 +4,23 @@ name: nf-core AWS full size tests # It runs the -profile 'test_full' on AWS batch on: - pull_request: - branches: - - main - - master workflow_dispatch: pull_request_review: types: [submitted] + release: + types: [published] jobs: run-platform: name: Run AWS full tests - # run only if the PR is approved by at least 2 reviewers and against the master branch or manually triggered - if: github.repository == 'nf-core/deepmutscan' && github.event.review.state == 'approved' && github.event.pull_request.base.ref == 'master' || github.event_name == 'workflow_dispatch' + # run only if the PR is approved by at least 2 reviewers and against the master/main branch or manually triggered + if: github.repository == 'nf-core/deepmutscan' && github.event.review.state == 'approved' && (github.event.pull_request.base.ref == 'master' || github.event.pull_request.base.ref == 'main') || github.event_name == 'workflow_dispatch' || github.event_name == 'release' runs-on: ubuntu-latest steps: - - name: Get PR reviews - uses: octokit/request-action@v2.x - if: github.event_name != 'workflow_dispatch' - id: check_approvals - continue-on-error: true - with: - route: GET /repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/reviews?per_page=100 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Check for approvals - if: ${{ failure() && github.event_name != 'workflow_dispatch' }} - run: | - echo "No review approvals found. At least 2 approvals are required to run this action automatically." - exit 1 - - - name: Check for enough approvals (>=2) - id: test_variables - if: github.event_name != 'workflow_dispatch' + - name: Set revision variable + id: revision run: | - JSON_RESPONSE='${{ steps.check_approvals.outputs.data }}' - CURRENT_APPROVALS_COUNT=$(echo $JSON_RESPONSE | jq -c '[.[] | select(.state | contains("APPROVED")) ] | length') - test $CURRENT_APPROVALS_COUNT -ge 2 || exit 1 # At least 2 approvals are required + echo "revision=${{ (github.event_name == 'workflow_dispatch' || github.event_name == 'release') && github.sha || 'dev' }}" >> "$GITHUB_OUTPUT" - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 @@ -49,21 +28,21 @@ jobs: # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + workspace_id: ${{ vars.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/deepmutscan/work-${{ github.sha }} + compute_env: ${{ vars.TOWER_COMPUTE_ENV }} + revision: ${{ steps.revision.outputs.revision }} + workdir: s3://${{ vars.AWS_S3_BUCKET }}/work/deepmutscan/work-${{ steps.revision.outputs.revision }} parameters: | { "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/deepmutscan/results-${{ github.sha }}" + "outdir": "s3://${{ vars.AWS_S3_BUCKET }}/deepmutscan/results-${{ steps.revision.outputs.revision }}" } profiles: test_full - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: Seqera Platform debug log file path: | - seqera_platform_action_*.log - seqera_platform_action_*.json + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 0f92edd..e82b4c0 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -14,20 +14,20 @@ jobs: - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + workspace_id: ${{ vars.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + compute_env: ${{ vars.TOWER_COMPUTE_ENV }} revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/deepmutscan/work-${{ github.sha }} + workdir: s3://${{ vars.AWS_S3_BUCKET }}/work/deepmutscan/work-${{ github.sha }} parameters: | { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/deepmutscan/results-test-${{ github.sha }}" + "outdir": "s3://${{ vars.AWS_S3_BUCKET }}/deepmutscan/results-test-${{ github.sha }}" } profiles: test - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: Seqera Platform debug log file path: | - seqera_platform_action_*.log - seqera_platform_action_*.json + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index c23beda..0000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,87 +0,0 @@ -name: nf-core CI -# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors -on: - push: - branches: - - dev - pull_request: - release: - types: [published] - workflow_dispatch: - -env: - NXF_ANSI_LOG: false - NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity - NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity - -concurrency: - group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" - cancel-in-progress: true - -jobs: - test: - name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }})" - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/deepmutscan') }}" - runs-on: ubuntu-latest - strategy: - matrix: - NXF_VER: - - "24.04.2" - - "latest-everything" - profile: - - "conda" - - "docker" - - "singularity" - test_name: - - "test" - isMaster: - - ${{ github.base_ref == 'master' }} - # Exclude conda and singularity on dev - exclude: - - isMaster: false - profile: "conda" - - isMaster: false - profile: "singularity" - steps: - - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - with: - fetch-depth: 0 - - - name: Set up Nextflow - uses: nf-core/setup-nextflow@v2 - with: - version: "${{ matrix.NXF_VER }}" - - - name: Set up Apptainer - if: matrix.profile == 'singularity' - uses: eWaterCycle/setup-apptainer@main - - - name: Set up Singularity - if: matrix.profile == 'singularity' - run: | - mkdir -p $NXF_SINGULARITY_CACHEDIR - mkdir -p $NXF_SINGULARITY_LIBRARYDIR - - - name: Set up Miniconda - if: matrix.profile == 'conda' - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 - with: - miniconda-version: "latest" - auto-update-conda: true - conda-solver: libmamba - channels: conda-forge,bioconda - - - name: Set up Conda - if: matrix.profile == 'conda' - run: | - echo $(realpath $CONDA)/condabin >> $GITHUB_PATH - echo $(realpath python) >> $GITHUB_PATH - - - name: Clean up Disk space - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" - run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} --outdir ./results diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 0b6b1f2..6adb0ff 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index ab06316..6d94bcb 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -12,14 +12,6 @@ on: required: true default: "dev" pull_request: - types: - - opened - - edited - - synchronize - branches: - - main - - master - pull_request_target: branches: - main - master @@ -52,9 +44,9 @@ jobs: - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.12" + python-version: "3.14" architecture: "x64" - name: Setup Apptainer @@ -65,7 +57,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install git+https://github.com/nf-core/tools.git@dev + pip install git+https://github.com/nf-core/tools.git - name: Make a cache directory for the container images run: | @@ -120,6 +112,7 @@ jobs: echo "IMAGE_COUNT_AFTER=$image_count" >> "$GITHUB_OUTPUT" - name: Compare container image counts + id: count_comparison run: | if [ "${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }}" -ne "${{ steps.count_afterwards.outputs.IMAGE_COUNT_AFTER }}" ]; then initial_count=${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }} @@ -132,3 +125,10 @@ jobs: else echo "The pipeline can be downloaded successfully!" fi + + - name: Upload Nextflow logfile for debugging purposes + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + with: + name: nextflow_logfile.txt + path: .nextflow.log* + include-hidden-files: true diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix_linting.yml similarity index 80% rename from .github/workflows/fix-linting.yml rename to .github/workflows/fix_linting.yml index 8bda64e..65b1dc7 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix_linting.yml @@ -13,13 +13,13 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: token: ${{ secrets.nf_core_bot_auth_token }} # indication that the linting is being fixed - name: React on comment - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: eyes @@ -32,9 +32,9 @@ jobs: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} # Install and run pre-commit - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.12" + python-version: "3.14" - name: Install pre-commit run: pip install pre-commit @@ -47,7 +47,7 @@ jobs: # indication that the linting has finished - name: react if linting finished succesfully if: steps.pre-commit.outcome == 'success' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: "+1" @@ -67,21 +67,21 @@ jobs: - name: react if linting errors were fixed id: react-if-fixed if: steps.commit-and-push.outcome == 'success' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: hooray - name: react if linting errors were not fixed if: steps.commit-and-push.outcome == 'failure' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: confused - name: react if linting errors were not fixed if: steps.commit-and-push.outcome == 'failure' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: issue-number: ${{ github.event.issue.number }} body: | diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index dbd52d5..30e6602 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -3,9 +3,6 @@ name: nf-core linting # It runs the `nf-core pipelines lint` and markdown lint tests to ensure # that the code meets the nf-core guidelines. on: - push: - branches: - - dev pull_request: release: types: [published] @@ -14,12 +11,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 - - name: Set up Python 3.12 - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - name: Set up Python 3.14 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.12" + python-version: "3.14" - name: Install pre-commit run: pip install pre-commit @@ -31,18 +28,18 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.12" + python-version: "3.14" architecture: "x64" - name: read .nf-core.yml - uses: pietrobolcato/action-read-yaml@1.1.0 + uses: pietrobolcato/action-read-yaml@9f13718d61111b69f30ab4ac683e67a56d254e1d # 1.1.0 id: read_yml with: config: ${{ github.workspace }}/.nf-core.yml @@ -74,7 +71,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bed96d..e6e9bc2 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@80620a5d27ce0ae443b965134db88467fc607b43 # v7 + uses: dawidd6/action-download-artifact@ac66b43f0e6a346234dd65d4d0c8fbb31cb316e5 # v11 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 + uses: marocchino/sticky-pull-request-comment@773744901bac0e8cbb5a0dc842800d45e9b2b405 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml new file mode 100644 index 0000000..e20bf6d --- /dev/null +++ b/.github/workflows/nf-test.yml @@ -0,0 +1,144 @@ +name: Run nf-test +on: + pull_request: + paths-ignore: + - "docs/**" + - "**/meta.yml" + - "**/*.md" + - "**/*.png" + - "**/*.svg" + release: + types: [published] + workflow_dispatch: + +# Cancel if a newer run is started +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NFT_VER: "0.9.3" + NFT_WORKDIR: "~" + NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity + +jobs: + nf-test-changes: + name: nf-test-changes + runs-on: # use self-hosted runners + - runs-on=${{ github.run_id }}-nf-test-changes + - runner=4cpu-linux-x64 + outputs: + shard: ${{ steps.set-shards.outputs.shard }} + total_shards: ${{ steps.set-shards.outputs.total_shards }} + steps: + - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner + run: | + ls -la ./ + rm -rf ./* || true + rm -rf ./.??* || true + ls -la ./ + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + with: + fetch-depth: 0 + + - name: get number of shards + id: set-shards + uses: ./.github/actions/get-shards + env: + NFT_VER: ${{ env.NFT_VER }} + with: + max_shards: 7 + + - name: debug + run: | + echo ${{ steps.set-shards.outputs.shard }} + echo ${{ steps.set-shards.outputs.total_shards }} + + nf-test: + name: "${{ matrix.profile }} | ${{ matrix.NXF_VER }} | ${{ matrix.shard }}/${{ needs.nf-test-changes.outputs.total_shards }}" + needs: [nf-test-changes] + if: ${{ needs.nf-test-changes.outputs.total_shards != '0' }} + runs-on: # use self-hosted runners + - runs-on=${{ github.run_id }}-nf-test + - runner=4cpu-linux-x64 + strategy: + fail-fast: false + matrix: + shard: ${{ fromJson(needs.nf-test-changes.outputs.shard) }} + profile: [conda, docker, singularity] + isMain: + - ${{ github.base_ref == 'master' || github.base_ref == 'main' }} + # Exclude conda and singularity on dev + exclude: + - isMain: false + profile: "conda" + - isMain: false + profile: "singularity" + NXF_VER: + - "25.04.0" + - "latest-everything" + env: + NXF_ANSI_LOG: false + TOTAL_SHARDS: ${{ needs.nf-test-changes.outputs.total_shards }} + + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + with: + fetch-depth: 0 + + - name: Run nf-test + id: run_nf_test + uses: ./.github/actions/nf-test + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} + env: + NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + NXF_VERSION: ${{ matrix.NXF_VER }} + with: + profile: ${{ matrix.profile }} + shard: ${{ matrix.shard }} + total_shards: ${{ env.TOTAL_SHARDS }} + + - name: Report test status + if: ${{ always() }} + run: | + if [[ "${{ steps.run_nf_test.outcome }}" == "failure" ]]; then + echo "::error::Test with ${{ matrix.NXF_VER }} failed" + # Add to workflow summary + echo "## ❌ Test failed: ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | Shard ${{ matrix.shard }}/${{ env.TOTAL_SHARDS }}" >> $GITHUB_STEP_SUMMARY + if [[ "${{ matrix.NXF_VER }}" == "latest-everything" ]]; then + echo "::warning::Test with latest-everything failed but will not cause workflow failure. Please check if the error is expected or if it needs fixing." + fi + if [[ "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + exit 1 + fi + fi + + confirm-pass: + needs: [nf-test] + if: always() + runs-on: # use self-hosted runners + - runs-on=${{ github.run_id }}-confirm-pass + - runner=2cpu-linux-x64 + steps: + - name: One or more tests failed (excluding latest-everything) + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 + + - name: One or more tests cancelled + if: ${{ contains(needs.*.result, 'cancelled') }} + run: exit 1 + + - name: All tests ok + if: ${{ contains(needs.*.result, 'success') }} + run: exit 0 + + - name: debug-print + if: always() + run: | + echo "::group::DEBUG: `needs` Contents" + echo "DEBUG: toJSON(needs) = ${{ toJSON(needs) }}" + echo "DEBUG: toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" + echo "::endgroup::" diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 450b1d5..e64cebd 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -14,6 +14,11 @@ jobs: run: | echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" | sed 's/-//g' >> $GITHUB_OUTPUT + - name: get description + id: get_topics + run: | + echo "description=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .description' >> $GITHUB_OUTPUT + - uses: rzr/fediverse-action@master with: access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} @@ -23,47 +28,16 @@ jobs: message: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + ${{ steps.get_topics.outputs.description }} + Please see the changelog: ${{ github.event.release.html_url }} ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics - send-tweet: - runs-on: ubuntu-latest - - steps: - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 - with: - python-version: "3.10" - - name: Install dependencies - run: pip install tweepy==4.14.0 - - name: Send tweet - shell: python - run: | - import os - import tweepy - - client = tweepy.Client( - access_token=os.getenv("TWITTER_ACCESS_TOKEN"), - access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), - consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), - consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), - ) - tweet = os.getenv("TWEET") - client.create_tweet(text=tweet) - env: - TWEET: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - - Please see the changelog: ${{ github.event.release.html_url }} - TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} - TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} - TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} - TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} - bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + - uses: zentered/bluesky-post-action@6461056ea355ea43b977e149f7bf76aaa572e5e8 # v0.3.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template-version-comment.yml similarity index 91% rename from .github/workflows/template_version_comment.yml rename to .github/workflows/template-version-comment.yml index 537529b..c5988af 100644 --- a/.github/workflows/template_version_comment.yml +++ b/.github/workflows/template-version-comment.yml @@ -9,12 +9,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: ref: ${{ github.event.pull_request.head.sha }} - name: Read template version from .nf-core.yml - uses: nichmor/minimal-read-yaml@v0.0.2 + uses: nichmor/minimal-read-yaml@1f7205277e25e156e1f63815781db80a6d490b8f # v0.0.2 id: read_yml with: config: ${{ github.workspace }}/.nf-core.yml diff --git a/.gitpod.yml b/.gitpod.yml deleted file mode 100644 index 83599f6..0000000 --- a/.gitpod.yml +++ /dev/null @@ -1,10 +0,0 @@ -image: nfcore/gitpod:latest -tasks: - - name: Update Nextflow and setup pre-commit - command: | - pre-commit install --install-hooks - nextflow self-update - -vscode: - extensions: - - nf-core.nf-core-extensionpack # https://github.com/nf-core/vscode-extensionpack diff --git a/.nf-core.yml b/.nf-core.yml index 0566ef9..3251f8f 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,23 +1,21 @@ -repository_type: pipeline - -nf_core_version: 3.1.2 - lint: {} - +nf_core_version: 3.4.1 +repository_type: pipeline template: - org: nf-core - name: deepmutscan - description: "Until now, most Deep Mutational Scanning (DMS) experiments relied - on variant-specific barcoded libraries for sequencing. This method enabled DMS - on large proteins and led to many great publications. Recently, efforts have increased - to make use of the classic and more simple random fragmentation-based short-read - sequencing (“shotgun-sequencing”). This saves time and money and due to its simpler - experimental design is less prone to mistakes. dmscore handles the essential computational - steps, processing the raw FASTQ files and generating a count table of variants. - Along the way, it provides multiple QC metrics, enabling users to quickly evaluate - the success of their experimental setup." author: Benjamin Wehnert & Max Stammnitz - version: 1.0.0 - force: true - outdir: . + description: "Until now, most Deep Mutational Scanning (DMS) experiments relied\ + \ on variant-specific barcoded libraries for sequencing. This method enabled DMS\ + \ on large proteins and led to many great publications. Recently, efforts have\ + \ increased to make use of the classic and more simple random fragmentation-based\ + \ short-read sequencing (\u201Cshotgun-sequencing\u201D). This saves time and\ + \ money and due to its simpler experimental design is less prone to mistakes.\ + \ dmscore handles the essential computational steps, processing the raw FASTQ\ + \ files and generating a count table of variants. Along the way, it provides multiple\ + \ QC metrics, enabling users to quickly evaluate the success of their experimental\ + \ setup." + force: false is_nfcore: true + name: deepmutscan + org: nf-core + outdir: . + version: 1.0.0 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9e9f0e1..d06777a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,10 +4,24 @@ repos: hooks: - id: prettier additional_dependencies: - - prettier@3.2.5 - - - repo: https://github.com/editorconfig-checker/editorconfig-checker.python - rev: "3.0.3" + - prettier@3.6.2 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 hooks: - - id: editorconfig-checker - alias: ec + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + exclude: | + (?x)^( + .*ro-crate-metadata.json$| + modules/nf-core/.*| + subworkflows/nf-core/.*| + .*\.snap$ + )$ + - id: end-of-file-fixer + exclude: | + (?x)^( + .*ro-crate-metadata.json$| + modules/nf-core/.*| + subworkflows/nf-core/.*| + .*\.snap$ + )$ diff --git a/.prettierignore b/.prettierignore index edd29f0..2255e3e 100644 --- a/.prettierignore +++ b/.prettierignore @@ -10,4 +10,5 @@ testing/ testing* *.pyc bin/ +.nf-test/ ro-crate-metadata.json diff --git a/.prettierrc.yml b/.prettierrc.yml index c81f9a7..07dbd8b 100644 --- a/.prettierrc.yml +++ b/.prettierrc.yml @@ -1 +1,6 @@ printWidth: 120 +tabWidth: 4 +overrides: + - files: "*.{md,yml,yaml,html,css,scss,js,cff}" + options: + tabWidth: 2 diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d3f816..0039a1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0.0dev - [date] +## v1.0.0 - [date] Initial release of nf-core/deepmutscan, created with the [nf-core](https://nf-co.re/) template. diff --git a/README.md b/README.md index b95584e..d317632 100644 --- a/README.md +++ b/README.md @@ -5,25 +5,23 @@ -[![GitHub Actions CI Status](https://github.com/nf-core/deepmutscan/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/deepmutscan/actions/workflows/ci.yml) -[![GitHub Actions Linting Status](https://github.com/nf-core/deepmutscan/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/deepmutscan/actions/workflows/linting.yml) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/deepmutscan/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new/nf-core/deepmutscan) +[![GitHub Actions CI Status](https://github.com/nf-core/deepmutscan/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/deepmutscan/actions/workflows/nf-test.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/deepmutscan/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/deepmutscan/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/deepmutscan/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.4.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.4.1) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/deepmutscan) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23deepmutscan-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/deepmutscan) -[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core) -[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core) -[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) + +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23deepmutscan-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/deepmutscan)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction -`nf-core/deepmutscan` is a workflow designed for the analysis of deep mutational scanning (DMS) data. DMS enables researchers to experimentally measure the fitness effects of thousands of genes or gene variants simultaneously, helping to classify disease causing mutants in human and animal populations, to learn the fundamental rules of virus evolution, protein architecture, splicing, small-molecule interactions and many other phenotypes. +**nf-core/deepmutscan** is a workflow designed for the analysis of deep mutational scanning (DMS) data. DMS enables researchers to experimentally measure the fitness effects of thousands of genes or gene variants simultaneously, helping to classify disease causing mutants in human and animal populations, to learn the fundamental rules of virus evolution, protein architecture, splicing, small-molecule interactions and many other phenotypes. While DNA synthesis and sequencing technologies have advanced substantially, long open reading frame (ORF) targets still present major challenges for DMS studies. Shotgun DNA sequencing can be used to greatly speed up the inference of long ORF mutant fitness landscapes, theoretically at no expense in accuracy. We have designed the `nf-core/deepmutscan` pipeline to unlock the power of shotgun sequencing based DMS studies on long ORFs, to simplify and standardise the complex bioinformatics steps involved in data processing of such experiments – from read alignment to QC reporting and fitness landscape inferences. @@ -88,25 +86,12 @@ nextflow run nf-core/deepmutscan \ --outdir ./results ``` -There are several optional [parameters](https://nf-co.re/deepmutscan/parameters), some of which are currently in development. For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/deepmutscan/usage). - ## Pipeline output -After execution, the pipeline creates the following directory structure: - -```folder title="output folder structure" -results/ -├── fastqc/ # Individual HTML reports for specified fastq files, raw sequencing QC -├── fitness/ # Merged variant count tables, fitness and error estimates, replicate correlations and heatmaps -├── intermediate_files/ # Raw alignments, raw and pre-filtered variant count tables, QC reports -├── library_QC/ # Sample-specific PDF visualizations: position-wise sequencing coverage, count heatmaps, etc. -├── multiqc/ # Shared HTML reports for all fastq files, raw sequencing QC -├── pipelineinfo/ # Nextflow helper files for timeline and summary report generation -├── timeline.html # Nextflow timeline for all tasks -└── report.html # Nextflow summary report incl. detailed CPU and memory usage per for all tasks -``` +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/deepmutscan/results) tab on the nf-core website pipeline page. -For a full overview of the output file types, please refer to the specific [documentation](https://nf-co.re/deepmutscan/output). +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/deepmutscan/output). ## Contributing @@ -122,7 +107,7 @@ If you are interested in getting involved as a developer, please consider joinin ## Credits -`nf-core/deepmutscan` was originally written by [Benjamin Wehnert](https://github.com/BenjaminWehnert1008) and [Max Stammnitz](https://github.com/MaximilianStammnitz) at the [Centre for Genomic Regulation, Barcelona](https://www.crg.eu/), with the generous support of an EMBO Long-term Postdoctoral Fellowship and a Marie Skłodowska-Curie grant by the European Union. +nf-core/deepmutscan was originally written by [Benjamin Wehnert](https://github.com/BenjaminWehnert1008) and [Max Stammnitz](https://github.com/MaximilianStammnitz) at the [Centre for Genomic Regulation, Barcelona](https://www.crg.eu/), with the generous support of an EMBO Long-term Postdoctoral Fellowship and a Marie Skłodowska-Curie grant by the European Union. If you use `nf-core/deepmutscan` in your analyses, please cite: @@ -133,13 +118,11 @@ Please also cite the `nf-core` framework: > 📄 Ewels et al., _Nature Biotechnology_, 2020 > [https://doi.org/10.1038/s41587-020-0439-x](https://doi.org/10.1038/s41587-020-0439-x) +For further information or help, don't hesitate to get in touch on the [Slack `#deepmutscan` channel](https://nfcore.slack.com/channels/deepmutscan) (you can join with [this invite](https://nf-co.re/join/slack)). + ## Scientific contact For scientific discussions around the use of this pipeline (e.g. on experimental design or sequencing data requirements), please feel free to get in touch with us directly: - Benjamin Wehnert — wehnertbenjamin@gmail.com - Maximilian Stammnitz — maximilian.stammnitz@crg.eu - -## CHANGELOG - -- [CHANGELOG](CHANGELOG.md) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 9a8562b..5845dd0 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,8 +1,7 @@ report_comment: > - This report has been generated by the nf-core/deepmutscan analysis pipeline. For information about - how to interpret these results, please see the documentation. + This report has been generated by the nf-core/deepmutscan + analysis pipeline. For information about how to interpret these results, please see the + documentation. report_section_order: "nf-core-deepmutscan-methods-description": order: -1000 diff --git a/assets/nf-core-deepmutscan_logo_light.png b/assets/nf-core-deepmutscan_logo_light.png index 9824bb2..d6c8e55 100644 Binary files a/assets/nf-core-deepmutscan_logo_light.png and b/assets/nf-core-deepmutscan_logo_light.png differ diff --git a/conf/base.config b/conf/base.config index a0daf47..8f313fe 100644 --- a/conf/base.config +++ b/conf/base.config @@ -15,7 +15,7 @@ process { memory = { 6.GB * task.attempt } time = { 4.h * task.attempt } - errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104 + 175) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' @@ -65,4 +65,8 @@ process { errorStrategy = 'retry' maxRetries = 2 } + withLabel: process_gpu { + ext.use_gpu = { workflow.profile.contains('gpu') } + accelerator = { workflow.profile.contains('gpu') ? 1 : null } + } } diff --git a/conf/modules.config b/conf/modules.config index 33b03f5..b0211d7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -97,13 +97,13 @@ process { publishDir = [ path: "${params.outdir}/intermediate_files/processed_gatk_files", mode: 'copy', - saveAs: { filename -> - if (filename == 'versions.yml') return null - "${meta.id}/${filename}" + saveAs: { filename -> + if (filename == 'versions.yml') return null + "${meta.id}/${filename}" } ] } - + withName: /.*VISUALIZATION_.*/ { publishDir = [ path: { "${params.outdir}/library_QC" }, // e.g. results/library_QC diff --git a/docs/images/nf-core-deepmutscan_logo_dark.png b/docs/images/nf-core-deepmutscan_logo_dark.png index 8b387fa..398e0b2 100644 Binary files a/docs/images/nf-core-deepmutscan_logo_dark.png and b/docs/images/nf-core-deepmutscan_logo_dark.png differ diff --git a/docs/images/nf-core-deepmutscan_logo_light.png b/docs/images/nf-core-deepmutscan_logo_light.png index ed93fd9..f8bbe08 100644 Binary files a/docs/images/nf-core-deepmutscan_logo_light.png and b/docs/images/nf-core-deepmutscan_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index bfe748d..4f57067 100644 --- a/docs/output.md +++ b/docs/output.md @@ -2,7 +2,6 @@ ## Introduction - The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory: ```tree diff --git a/docs/usage.md b/docs/usage.md index 55eb737..9a7e86b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -45,7 +45,7 @@ Pipeline settings can be provided in a `yaml` or `json` file via `-params-file < The above pipeline run specified with a params file in yaml format: ```bash -nextflow run nf-core/deepmutscan -params-file params.yaml +nextflow run nf-core/deepmutscan -profile docker -params-file params.yaml ``` with: @@ -220,7 +220,7 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - `shifter` - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) + - A generic configuration profile to be used with [Charliecloud](https://charliecloud.io/) - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `wave` diff --git a/log.txt b/log.txt index 38aa48b..dceb2c1 100644 --- a/log.txt +++ b/log.txt @@ -26,7 +26,7 @@ May-07 23:01:52.045 [main] DEBUG nextflow.Session - Run name: modest_coulomb May-07 23:01:52.046 [main] DEBUG nextflow.Session - Executor pool size: 8 May-07 23:01:52.053 [main] DEBUG nextflow.file.FilePorter - File porter settings maxRetries=3; maxTransfers=50; pollTimeout=null May-07 23:01:52.057 [main] DEBUG nextflow.util.ThreadPoolBuilder - Creating thread pool 'FileTransfer' minSize=10; maxSize=24; workQueue=LinkedBlockingQueue[10000]; allowCoreThreadTimeout=false -May-07 23:01:52.077 [main] DEBUG nextflow.cli.CmdRun - +May-07 23:01:52.077 [main] DEBUG nextflow.cli.CmdRun - Version: 24.04.4 build 5917 Created: 01-08-2024 07:05 UTC (09:05 CEST) System: Mac OS X 15.0 @@ -55,7 +55,7 @@ May-07 23:01:54.356 [main] DEBUG nextflow.script.IncludeDef - Loading included p May-07 23:01:54.357 [main] DEBUG nextflow.script.IncludeDef - Loading included plugin extensions with names: [validateParameters:validateParameters]; plugin Id: nf-schema May-07 23:01:54.359 [main] DEBUG nextflow.script.IncludeDef - Loading included plugin extensions with names: [paramsSummaryMap:paramsSummaryMap]; plugin Id: nf-schema May-07 23:01:54.360 [main] DEBUG nextflow.script.IncludeDef - Loading included plugin extensions with names: [samplesheetToList:samplesheetToList]; plugin Id: nf-schema -May-07 23:01:54.574 [main] INFO nextflow.Nextflow - +May-07 23:01:54.574 [main] INFO nextflow.Nextflow - ------------------------------------------------------ ,--./,-.  ___ __ __ __ ___ /,-._.--~' @@ -176,7 +176,7 @@ May-07 23:01:56.514 [main] DEBUG nextflow.script.ScriptRunner - Parsed script fi Script_a1878766b1a6b241: /Users/benjaminwehnert/dmscore/./workflows/../modules/local/dmsanalysis/processgatk.nf Script_37dcd664b2773148: /Users/benjaminwehnert/dmscore/./workflows/../modules/local/gatk/saturationmutagenesis.nf Script_25aa31c18d513c61: /Users/benjaminwehnert/dmscore/./workflows/../modules/nf-core/bwa/index/main.nf -May-07 23:01:56.514 [main] DEBUG nextflow.script.ScriptRunner - > Awaiting termination +May-07 23:01:56.514 [main] DEBUG nextflow.script.ScriptRunner - > Awaiting termination May-07 23:01:56.514 [main] DEBUG nextflow.Session - Session await May-07 23:01:56.605 [Actor Thread 1] DEBUG nextflow.sort.BigSort - Sort completed -- entries: 1; slices: 1; internal sort time: 0.001 s; external sort time: 0.011 s; total time: 0.012 s May-07 23:01:56.605 [Actor Thread 2] DEBUG nextflow.sort.BigSort - Sort completed -- entries: 1; slices: 1; internal sort time: 0.001 s; external sort time: 0.011 s; total time: 0.012 s @@ -198,8 +198,8 @@ May-07 23:02:00.613 [Task submitter] INFO nextflow.Session - [49/349fd9] Submit May-07 23:02:00.663 [TaskFinalizer-1] DEBUG nextflow.util.ThreadPoolBuilder - Creating thread pool 'PublishDir' minSize=10; maxSize=24; workQueue=LinkedBlockingQueue[10000]; allowCoreThreadTimeout=false May-07 23:02:00.756 [Actor Thread 15] INFO nextflow.file.FilePorter - Staging foreign file: https://raw.githubusercontent.com/BenjaminWehnert1008/test-datasets/dmsqc/dmsqc/pMS190_GID1A_SUNi_S2_1_R2_50k.fastq May-07 23:02:00.757 [Actor Thread 4] WARN nextflow.processor.TaskContext - Cannot serialize context map. Cause: java.lang.IllegalArgumentException: Unable to create serializer "com.esotericsoftware.kryo.serializers.FieldSerializer" for class: java.lang.ref.ReferenceQueue -- Resume will not work on this process -May-07 23:02:00.763 [Actor Thread 4] DEBUG nextflow.processor.TaskContext - Failed to serialize delegate map items: [ - 'meta':[Script_09ccfa79b2802f41$_runScript_closure1$_closure26] = +May-07 23:02:00.763 [Actor Thread 4] DEBUG nextflow.processor.TaskContext - Failed to serialize delegate map items: [ + 'meta':[Script_09ccfa79b2802f41$_runScript_closure1$_closure26] = 'pos_range':[java.lang.String] = 352-1383 '$':[java.lang.Boolean] = true 'wt_seq':[nextflow.processor.TaskPath] = GID1A_SUNi_ref_small.fasta @@ -380,8 +380,8 @@ May-07 23:02:13.067 [Task monitor] DEBUG n.processor.TaskPollingMonitor - Task c May-07 23:02:13.085 [Task submitter] DEBUG n.executor.local.LocalTaskHandler - Launch cmd line: /bin/bash -ue .command.run May-07 23:02:13.085 [Task submitter] INFO nextflow.Session - [67/c2a06b] Submitted process > NFCORE_DMSCORE:DMSCORE:BWA_MEM (gid1a_1_quality_1_pe) May-07 23:02:13.133 [Actor Thread 6] WARN nextflow.processor.TaskContext - Cannot serialize context map. Cause: java.lang.IllegalArgumentException: Unable to create serializer "com.esotericsoftware.kryo.serializers.FieldSerializer" for class: java.lang.ref.ReferenceQueue -- Resume will not work on this process -May-07 23:02:13.136 [Actor Thread 6] DEBUG nextflow.processor.TaskContext - Failed to serialize delegate map items: [ - 'meta':[Script_09ccfa79b2802f41$_runScript_closure1$_closure26] = +May-07 23:02:13.136 [Actor Thread 6] DEBUG nextflow.processor.TaskContext - Failed to serialize delegate map items: [ + 'meta':[Script_09ccfa79b2802f41$_runScript_closure1$_closure26] = 'pos_range':[java.lang.String] = 352-1383 'mutagenesis_type':[java.lang.String] = max_diff_to_wt '$':[java.lang.Boolean] = true diff --git a/main.nf b/main.nf index 5f339be..2df1740 100644 --- a/main.nf +++ b/main.nf @@ -72,7 +72,10 @@ workflow { params.monochrome_logs, args, params.outdir, - params.input + params.input, + params.help, + params.help_full, + params.show_hidden ) // diff --git a/modules.json b/modules.json index 5c07b2b..966776f 100644 --- a/modules.json +++ b/modules.json @@ -17,12 +17,12 @@ }, "fastqc": { "branch": "master", - "git_sha": "dc94b6ee04a05ddb9f7ae050712ff30a13149164", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", + "git_sha": "e10b76ca0c66213581bec2833e30d31f239dec0b", "installed_by": ["modules"] } } @@ -31,17 +31,17 @@ "nf-core": { "utils_nextflow_pipeline": { "branch": "master", - "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", - "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", + "git_sha": "4b406a74dc0449c0401ed87d5bfff4252fd277fd", "installed_by": ["subworkflows"] } } diff --git a/modules/local/bamprocessing/premerge.nf b/modules/local/bamprocessing/premerge.nf index af7e16f..4a66f85 100644 --- a/modules/local/bamprocessing/premerge.nf +++ b/modules/local/bamprocessing/premerge.nf @@ -36,7 +36,7 @@ process PREMERGE { premerge: \$(samtools --version |& sed '1!d ; s/samtools //') END_VERSIONS """ - + stub: def prefix = task.ext.prefix ?: "${meta.id}" """ diff --git a/modules/local/dmsanalysis/aaseq.nf b/modules/local/dmsanalysis/aaseq.nf index 8d02563..dde104f 100644 --- a/modules/local/dmsanalysis/aaseq.nf +++ b/modules/local/dmsanalysis/aaseq.nf @@ -3,8 +3,8 @@ process DMSANALYSIS_AASEQ { label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' - ? 'community.wave.seqera.io/library/bioconductor-biostrings_r-base_r-biocmanager_r-dplyr_pruned:ce2ba7ad7f6e7f2c' + container "${ workflow.containerEngine == 'singularity' + ? 'community.wave.seqera.io/library/bioconductor-biostrings_r-base_r-biocmanager_r-dplyr_pruned:ce2ba7ad7f6e7f2c' : 'community.wave.seqera.io/library/bioconductor-biostrings_r-base_r-biocmanager_r-dplyr_pruned:0fd2e39a5bf2ecaa' }" input: @@ -22,11 +22,11 @@ process DMSANALYSIS_AASEQ { script: """ start_stop_codon="$pos_range" - + R_version=\$(R --version | head -n 1 | sed 's/^R version //') - + Rscript -e "source('$script'); aa_seq('$wt_seq', '\$start_stop_codon', 'aa_seq.txt')" - + # Extract R base and packages versions R_VERSION=\$(R --version | head -n 1 | sed -E 's/^R version ([0-9.]+).*/\\1/') BIOSTRINGS_VERSION=\$(Rscript -e "packageVersion('Biostrings')" | grep -Eo '[0-9]+(\\.[0-9]+)+') @@ -34,7 +34,7 @@ process DMSANALYSIS_AASEQ { DMSANALYSIS_AASEQ: r-base: \$R_VERSION biostrings: \$BIOSTRINGS_VERSION - END_VERSIONS + END_VERSIONS """ stub: diff --git a/modules/local/dmsanalysis/bin/SeqDepth_simulation.R b/modules/local/dmsanalysis/bin/SeqDepth_simulation.R index 2207429..ec832d3 100644 --- a/modules/local/dmsanalysis/bin/SeqDepth_simulation.R +++ b/modules/local/dmsanalysis/bin/SeqDepth_simulation.R @@ -127,5 +127,3 @@ SeqDepth_simulation_plot <- function(prefiltered_gatk_path, possible_mutations_p #SeqDepth - - diff --git a/modules/local/dmsanalysis/bin/complete_prefiltered_gatk.R b/modules/local/dmsanalysis/bin/complete_prefiltered_gatk.R index 1e16adc..88cc6b5 100644 --- a/modules/local/dmsanalysis/bin/complete_prefiltered_gatk.R +++ b/modules/local/dmsanalysis/bin/complete_prefiltered_gatk.R @@ -76,4 +76,3 @@ complete_prefiltered_gatk <- function(possible_nnk_path, prefiltered_gatk_path, # Example call #complete_prefiltered_gatk("/Users/benjaminwehnert/CRG/DMS_QC/testing_data/possible_NNK_mutations.csv", "/Users/benjaminwehnert/CRG/DMS_QC/testing_data/gatk_filtered_by_codon_library.csv", "/Users/benjaminwehnert/CRG/DMS_QC/testing_data/testing_outputs") - diff --git a/modules/local/dmsanalysis/bin/counts_heatmap.R b/modules/local/dmsanalysis/bin/counts_heatmap.R index 68a85a7..0814daa 100644 --- a/modules/local/dmsanalysis/bin/counts_heatmap.R +++ b/modules/local/dmsanalysis/bin/counts_heatmap.R @@ -8,8 +8,8 @@ counts_heatmap <- function(input_csv_path, threshold = 3, output_pdf_path, img_f # Inner function to add padding to the last row, adding 21 amino acids per position pad_heatmap_data_long <- function(heatmap_data_long, min_non_na_value, num_positions_per_row = 75) { - all_amino_acids <- c("G", "A", "V", "L", "M", "I", "F", - "Y", "W", "K", "R", "H", "D", "E", + all_amino_acids <- c("G", "A", "V", "L", "M", "I", "F", + "Y", "W", "K", "R", "H", "D", "E", "S", "T", "C", "N", "Q", "P", "*") max_position <- max(heatmap_data_long$position) @@ -70,8 +70,8 @@ counts_heatmap <- function(input_csv_path, threshold = 3, output_pdf_path, img_f mutate(synonymous = mut_aa == wt_aa) # Definiere die korrekte Reihenfolge der Aminosäuren - amino_acid_order <- rev(c("G", "A", "V", "L", "M", "I", "F", - "Y", "W", "K", "R", "H", "D", "E", + amino_acid_order <- rev(c("G", "A", "V", "L", "M", "I", "F", + "Y", "W", "K", "R", "H", "D", "E", "S", "T", "C", "N", "Q", "P", "*")) heatmap_data_long <- heatmap_data_long %>% diff --git a/modules/local/dmsanalysis/bin/counts_per_cov_heatmap.R b/modules/local/dmsanalysis/bin/counts_per_cov_heatmap.R index 2a98865..ea7b36d 100644 --- a/modules/local/dmsanalysis/bin/counts_per_cov_heatmap.R +++ b/modules/local/dmsanalysis/bin/counts_per_cov_heatmap.R @@ -8,8 +8,8 @@ counts_per_cov_heatmap <- function(input_csv_path, threshold = 3, output_pdf_pat # Inner function to add padding to the last row, adding 21 amino acids per position pad_heatmap_data_long <- function(heatmap_data_long, min_non_na_value, num_positions_per_row = 75) { - all_amino_acids <- c("G", "A", "V", "L", "M", "I", "F", - "Y", "W", "K", "R", "H", "D", "E", + all_amino_acids <- c("G", "A", "V", "L", "M", "I", "F", + "Y", "W", "K", "R", "H", "D", "E", "S", "T", "C", "N", "Q", "P", "*") max_position <- max(heatmap_data_long$position) @@ -70,8 +70,8 @@ counts_per_cov_heatmap <- function(input_csv_path, threshold = 3, output_pdf_pat mutate(synonymous = mut_aa == wt_aa) # Definiere die korrekte Reihenfolge der Aminosäuren - amino_acid_order <- rev(c("G", "A", "V", "L", "M", "I", "F", - "Y", "W", "K", "R", "H", "D", "E", + amino_acid_order <- rev(c("G", "A", "V", "L", "M", "I", "F", + "Y", "W", "K", "R", "H", "D", "E", "S", "T", "C", "N", "Q", "P", "*")) heatmap_data_long <- heatmap_data_long %>% diff --git a/modules/local/dmsanalysis/bin/detect_codons.R b/modules/local/dmsanalysis/bin/detect_codons.R index 0c35b3b..1d1d36f 100644 --- a/modules/local/dmsanalysis/bin/detect_codons.R +++ b/modules/local/dmsanalysis/bin/detect_codons.R @@ -61,5 +61,3 @@ find_start_stop <- function(input_string, start_codon = "ATG", stop_codons = c(" } #find_start_stop("/Users/benjaminwehnert/CRG/DMS_QC/testing_data/MORtn5_reference.fa") - - diff --git a/modules/local/dmsanalysis/bin/dimsum_experimentalDesign.R b/modules/local/dmsanalysis/bin/dimsum_experimentalDesign.R index 123593d..89f0ce1 100644 --- a/modules/local/dmsanalysis/bin/dimsum_experimentalDesign.R +++ b/modules/local/dmsanalysis/bin/dimsum_experimentalDesign.R @@ -6,17 +6,17 @@ make_dimsum_experimental_design <- function(samplesheet_csv, out_path = "experim # ---- read & normalize ---- ss <- read.csv(samplesheet_csv, stringsAsFactors = FALSE, check.names = FALSE) names(ss) <- tolower(names(ss)) - + # tolerate missing file2 column (single-end) if (!"file2" %in% names(ss)) ss$file2 <- "" - + required <- c("sample", "type", "replicate", "file1", "file2") missing <- setdiff(required, names(ss)) if (length(missing) > 0) stop("Samplesheet missing columns: ", paste(missing, collapse = ", ")) - + # coerce types ss$replicate <- as.integer(ss$replicate) - + # ---- derive sample_name strategy ---- # If only one biological sample present (e.g. one protein), use "input1", "output2", ... # If multiple biological samples present, prefix with 'sample' to avoid collisions: @@ -27,7 +27,7 @@ make_dimsum_experimental_design <- function(samplesheet_csv, out_path = "experim } else { sample_name <- paste0(ss$type, ss$replicate) } - + # ---- build DiMSum columns ---- experiment_replicate <- ss$replicate selection_id <- ifelse(ss$type == "input", 0L, @@ -36,11 +36,11 @@ make_dimsum_experimental_design <- function(samplesheet_csv, out_path = "experim selection_replicate <- ifelse(ss$type == "output", 1L, NA_integer_) # assume one technical batch technical_replicate <- rep(1L, nrow(ss)) - + pair1 <- basename(ss$file1) # keep empty string for single-end / missing file2 pair2 <- ifelse(is.na(ss$file2) | ss$file2 == "", "", basename(ss$file2)) - + ed <- data.frame( sample_name = sample_name, experiment_replicate = experiment_replicate, @@ -51,7 +51,7 @@ make_dimsum_experimental_design <- function(samplesheet_csv, out_path = "experim pair2 = pair2, stringsAsFactors = FALSE ) - + # ---- order rows: by sample (if multiple), type (input, output, quality), then replicate ---- type_rank <- match(ss$type, c("input", "output", "quality")) ord <- if (multi_base) { @@ -61,9 +61,8 @@ make_dimsum_experimental_design <- function(samplesheet_csv, out_path = "experim } ed <- ed[ord, , drop = FALSE] rownames(ed) <- NULL - + # ---- write & return ---- write.table(ed, file = out_path, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE, na = "") return(ed) } - diff --git a/modules/local/dmsanalysis/bin/filter_gatk_by_codon_library.R b/modules/local/dmsanalysis/bin/filter_gatk_by_codon_library.R index 15ee22d..b05d391 100644 --- a/modules/local/dmsanalysis/bin/filter_gatk_by_codon_library.R +++ b/modules/local/dmsanalysis/bin/filter_gatk_by_codon_library.R @@ -120,5 +120,3 @@ filter_gatk_by_codon_library <- function(gatk_file_path, codon_library_path, out # example #filter_gatk_by_codon_library("/Users/benjaminwehnert/CRG/DMS_QC/testing_data/raw_gatk.csv", codon_library = "/Users/benjaminwehnert/CRG/DMS_QC/testing_data/possible_NNK_mutations.csv", output_file_path = "/Users/benjaminwehnert/CRG/DMS_QC/testing_data/gatk_filtered_by_codon_library.csv") ### this one's is correct for this data set #filter_gatk_by_codon_library("/Users/benjaminwehnert/CRG/DMS_QC/testing_data/raw_gatk.csv", codon_library = "/Users/benjaminwehnert/CRG/DMS_QC/testing_data/testing_outputs/possible_NNK_mutations_taylors_nnk_and_nns.csv", output_file_path = "/Users/benjaminwehnert/CRG/DMS_QC/testing_data/testing_outputs/gatk_filtered_by_complete_codon_library.csv") - - diff --git a/modules/local/dmsanalysis/bin/find_syn_mutation.R b/modules/local/dmsanalysis/bin/find_syn_mutation.R index 716f4f9..0e593d0 100644 --- a/modules/local/dmsanalysis/bin/find_syn_mutation.R +++ b/modules/local/dmsanalysis/bin/find_syn_mutation.R @@ -21,7 +21,7 @@ pick_synonymous_wt_from_range <- function(wt_fasta, counts_merged_tsv, pos_range start_pos <- as.integer(pr[1]); end_pos <- as.integer(pr[2]) if (is.na(start_pos) || is.na(end_pos) || start_pos < 1L || end_pos < start_pos) stop("Invalid pos_range: ", pos_range) - + ## ---- WT window ---- wt_set <- Biostrings::readDNAStringSet(wt_fasta) if (length(wt_set) != 1L) stop("WT FASTA must contain exactly one sequence.") @@ -31,57 +31,57 @@ pick_synonymous_wt_from_range <- function(wt_fasta, counts_merged_tsv, pos_range if ((wt_len %% 3) != 0) stop("Provided window length is not divisible by 3: ", wt_len) wt_aa <- Biostrings::translate(wt_subseq, if.fuzzy.codon = "X") wt_chars <- strsplit(wt_seq_chr, "", fixed = TRUE)[[1]] - + ## ---- counts ---- df <- utils::read.delim(counts_merged_tsv, sep = "\t", header = TRUE, stringsAsFactors = FALSE, check.names = FALSE) if (!"nt_seq" %in% names(df)) stop("counts_merged_tsv must have a 'nt_seq' column.") - + df$nt_seq <- toupper(df$nt_seq) keep_len <- nchar(df$nt_seq) == wt_len if (!any(keep_len)) stop("No sequences match WT window length (", wt_len, ").") if (!all(keep_len)) df <- df[keep_len, , drop = FALSE] - + # input columns & mean (works with 1+ replicates) input_cols <- grep("^input", names(df), value = TRUE) if (length(input_cols) == 0L) stop("No input columns found (expect names starting with 'input').") input_mat <- as.data.frame(lapply(df[, input_cols, drop = FALSE], function(x) as.numeric(as.character(x)))) input_mean <- if (length(input_cols) == 1L) input_mat[[1]] else rowMeans(as.matrix(input_mat), na.rm = TRUE) - + ## ---- synonymous filter ---- var_set <- Biostrings::DNAStringSet(df$nt_seq) var_aa <- Biostrings::translate(var_set, if.fuzzy.codon = "X") syn_idx <- which(as.character(var_aa) == as.character(wt_aa)) if (length(syn_idx) == 0L) stop("No fully-synonymous variants found relative to WT translation.") - + # helpers mismatch_positions <- function(seq_nt_chars) which(seq_nt_chars != wt_chars) # 1-based positions codon_index <- function(pos_vec) floor((pos_vec - 1L) / 3L) # 0-based codon bin - + # preference 1: exactly 2 mismatches, both within the same codon cand_two_one <- Filter(function(i) { vchars <- strsplit(df$nt_seq[i], "", fixed = TRUE)[[1]] pos <- mismatch_positions(vchars) length(pos) == 2L && length(unique(codon_index(pos))) == 1L }, syn_idx) - + choose_best <- function(idx_vec) idx_vec[ which.max(input_mean[idx_vec]) ] - + if (length(cand_two_one) > 0L) { best_i <- choose_best(cand_two_one) return(as.character(df$nt_seq[best_i])) } - + # preference 2 (fallback): exactly 1 mismatch (still synonymous) cand_one <- Filter(function(i) { vchars <- strsplit(df$nt_seq[i], "", fixed = TRUE)[[1]] length(mismatch_positions(vchars)) == 1L }, syn_idx) - + if (length(cand_one) > 0L) { best_i <- choose_best(cand_one) return(as.character(df$nt_seq[best_i])) } - + stop("No suitable synonymous variant found: neither 2-in-1-codon nor 1-nt synonymous candidates present.") } diff --git a/modules/local/dmsanalysis/bin/fitness_calculation.R b/modules/local/dmsanalysis/bin/fitness_calculation.R index b51e92c..5100cef 100644 --- a/modules/local/dmsanalysis/bin/fitness_calculation.R +++ b/modules/local/dmsanalysis/bin/fitness_calculation.R @@ -57,8 +57,8 @@ compute_aa_hamming <- function(merged.counts, wt.seq.aa) { # name the mutations name_mutations <- function(merged.counts, wt.seq.aa) { - merged.counts <- cbind("wt aa" = rep(NA, nrow(merged.counts)), - "pos" = rep(NA, nrow(merged.counts)), + merged.counts <- cbind("wt aa" = rep(NA, nrow(merged.counts)), + "pos" = rep(NA, nrow(merged.counts)), "mut aa" = rep(NA, nrow(merged.counts)), merged.counts) for (i in 1:nrow(merged.counts)){ if(merged.counts$aa_ham[i] == 0){ @@ -79,11 +79,11 @@ name_mutations <- function(merged.counts, wt.seq.aa) { aggregate_by_aa <- function(merged.counts) { ## find stops, WT and WT merged.counts <- cbind(merged.counts, - "wt" = rep(NA, nrow(merged.counts)), + "wt" = rep(NA, nrow(merged.counts)), "stop" = rep(NA, nrow(merged.counts))) merged.counts$wt[which(merged.counts$nt_ham == 0)] <- TRUE merged.counts$stop[which(merged.counts$`mut aa` == "*")] <- TRUE - + ## aggregate counts of variants which are identical on the aa (but not nt) level ## exception: wildtype ones ## thereby shrinking the matrix @@ -116,34 +116,34 @@ calc_raw_fitness <- function(merged.counts, exp.design) { merged.counts <- cbind(merged.counts, rep(NA, nrow(merged.counts))) colnames(merged.counts)[ncol(merged.counts)] <- paste0("raw_fitness_rep", i) } - + ## calculate raw fitness of all variants vs. WT variant for (i in 1:reps){ - + ### collect counts tmp.input.counts <- merged.counts[,paste0("input", i)] tmp.output.counts <- merged.counts[,paste0("output", i)] - + ### add pseudo-count to zero-outputs (if the corresponding input count is non-zero) tmp.output.counts[which(tmp.output.counts == 0 & tmp.input.counts != 0)] <- 1 - + ### take logs - tmp.wt.log.ratio <- log(tmp.output.counts[which(merged.counts$wt == TRUE)] / + tmp.wt.log.ratio <- log(tmp.output.counts[which(merged.counts$wt == TRUE)] / tmp.input.counts[which(merged.counts$wt == TRUE)]) - tmp.fitness <- log(tmp.output.counts / + tmp.fitness <- log(tmp.output.counts / tmp.input.counts) - tmp.wt.log.ratio - + ### uncertain values to NA tmp.fitness[which(is.na(tmp.fitness) == TRUE)] <- NA tmp.fitness[which(tmp.fitness == "Inf")] <- NA - + ### add to table merged.counts[,c(ncol(merged.counts) - reps + i)] <- tmp.fitness - + ### clean up rm(tmp.fitness, tmp.wt.log.ratio, tmp.output.counts, tmp.input.counts) } - + list(merged.counts = merged.counts, reps = reps) } @@ -151,50 +151,50 @@ calc_raw_fitness <- function(merged.counts, exp.design) { rescale_and_summarize <- function(merged.counts, reps) { ## center the raw fitness distributions on 0 (median of wildtype synonymous) and -1 (median of stops) for (i in 1:reps){ - + merged.counts <- cbind(merged.counts, rep(NA, nrow(merged.counts))) colnames(merged.counts)[ncol(merged.counts)] <- paste0("rescaled_fitness_rep", i) - + ### fetch the key counts tmp.wt.fitness <- merged.counts[which(merged.counts$aa_ham == 0),ncol(merged.counts) - reps] tmp.stop.fitness <- merged.counts[which(merged.counts$stop == TRUE),ncol(merged.counts) - reps] - + ### rescale tmp.wt.fitness.med <- median(tmp.wt.fitness, na.rm = TRUE) tmp.stop.fitness.med <- median(tmp.stop.fitness, na.rm = TRUE) if(tmp.stop.fitness.med >= tmp.wt.fitness.med){ - + tmp.wt.fitness.mean <- mean(tmp.wt.fitness, na.rm = TRUE) tmp.stop.fitness.mean <- mean(tmp.stop.fitness, na.rm = TRUE) lm.rescale <- lm(c(0, -1) ~ c(tmp.wt.fitness.mean, tmp.stop.fitness.mean)) merged.counts[,ncol(merged.counts)] <- merged.counts[,ncol(merged.counts) - reps] * lm.rescale$coefficients[[2]] + lm.rescale$coefficients[[1]] - rm(tmp.wt.fitness, tmp.stop.fitness, - tmp.wt.fitness.mean, tmp.stop.fitness.mean, + rm(tmp.wt.fitness, tmp.stop.fitness, + tmp.wt.fitness.mean, tmp.stop.fitness.mean, tmp.wt.fitness.med, tmp.stop.fitness.med, lm.rescale) next - + }else{ - + lm.rescale <- lm(c(0, -1) ~ c(tmp.wt.fitness.med, tmp.stop.fitness.med)) merged.counts[,ncol(merged.counts)] <- merged.counts[,ncol(merged.counts) - reps] * lm.rescale$coefficients[[2]] + lm.rescale$coefficients[[1]] - rm(tmp.wt.fitness, tmp.stop.fitness, + rm(tmp.wt.fitness, tmp.stop.fitness, tmp.wt.fitness.med, tmp.stop.fitness.med, lm.rescale) next - + } } - + ## calculate fitness mean and standard deviation across replicates merged.counts <- cbind(merged.counts, "mean fitness" = rep(NA, nrow(merged.counts)), "fitness sd" = rep(NA, nrow(merged.counts))) - + if(reps == 1){ - + merged.counts$`mean fitness` <- merged.counts[,ncol(merged.counts) - 2] - + }else if(reps > 1){ - + merged.counts$`mean fitness` <- apply(merged.counts[,c(ncol(merged.counts) - 2*reps + 1, ncol(merged.counts) - reps)], 1, mean, @@ -203,9 +203,9 @@ rescale_and_summarize <- function(merged.counts, reps) { 1, sd, na.rm = TRUE) - + } - + merged.counts } @@ -225,47 +225,47 @@ run_fitness_estimation <- function(counts_path, output_path) { ## 1. Import key files ## ######################### - + merged.counts <- read.table(counts_path, sep = "\t", header = TRUE, check.names = FALSE) exp.design <- read.table(design_path, sep = "\t", header = TRUE, check.names = FALSE) wt.seq <- DNAString(as.character(read.table(wt_seq_path))) wt.seq.aa <- translate(wt.seq) - + ## 2. Pre-processing the count table ## ####################################### - + ## calculate nt hamming distances from the specified WT merged.counts <- compute_nt_hamming(merged.counts, wt.seq) - + ## translate sequences merged.counts <- add_aa_seq(merged.counts) - + ## calculate AA hamming distances from the WT merged.counts <- compute_aa_hamming(merged.counts, wt.seq.aa) - + ## name the mutations merged.counts <- name_mutations(merged.counts, wt.seq.aa) - + ## find stops, WT and WT; aggregate AA-identical variants (except WT) merged.counts <- aggregate_by_aa(merged.counts) - + ## 3. Raw fitness calculations ## ################################# fitness_res <- calc_raw_fitness(merged.counts, exp.design) merged.counts <- fitness_res$merged.counts reps <- fitness_res$reps - + ## 4. Fitness and error refinements ## ###################################### merged.counts <- rescale_and_summarize(merged.counts, reps) - + ## clean up rm(reps) - + ## export write.table(merged.counts, output_path, col.names = TRUE, row.names = FALSE, quote = FALSE, sep = "\t", na = "") - + invisible(merged.counts) } @@ -277,25 +277,25 @@ run_fitness_estimation <- function(counts_path, # R version 4.5.1 (2025-06-13) # Platform: aarch64-apple-darwin20 # Running under: macOS Sonoma 14.6.1 -# +# # Matrix products: default -# BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib +# BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib # LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1 -# +# # locale: # [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 -# +# # time zone: Europe/Madrid # tzcode source: internal -# +# # attached base packages: -# [1] stats4 stats graphics grDevices utils datasets methods base -# +# [1] stats4 stats graphics grDevices utils datasets methods base +# # other attached packages: -# [1] Biostrings_2.76.0 GenomeInfoDb_1.44.2 XVector_0.48.0 IRanges_2.42.0 S4Vectors_0.46.0 -# [6] BiocGenerics_0.54.0 generics_0.1.4 -# +# [1] Biostrings_2.76.0 GenomeInfoDb_1.44.2 XVector_0.48.0 IRanges_2.42.0 S4Vectors_0.46.0 +# [6] BiocGenerics_0.54.0 generics_0.1.4 +# # loaded via a namespace (and not attached): -# [1] httr_1.4.7 compiler_4.5.1 R6_2.6.1 tools_4.5.1 -# [5] GenomeInfoDbData_1.2.14 rstudioapi_0.17.1 crayon_1.5.3 UCSC.utils_1.4.0 -# [9] jsonlite_2.0.0 \ No newline at end of file +# [1] httr_1.4.7 compiler_4.5.1 R6_2.6.1 tools_4.5.1 +# [5] GenomeInfoDbData_1.2.14 rstudioapi_0.17.1 crayon_1.5.3 UCSC.utils_1.4.0 +# [9] jsonlite_2.0.0 diff --git a/modules/local/dmsanalysis/bin/fitness_heatmap.R b/modules/local/dmsanalysis/bin/fitness_heatmap.R index 28a405a..04c3cee 100644 --- a/modules/local/dmsanalysis/bin/fitness_heatmap.R +++ b/modules/local/dmsanalysis/bin/fitness_heatmap.R @@ -123,8 +123,8 @@ build_heatmap_long <- function(df, } syn_segments <- function(d, positions_per_row = 75) { - amino_order <- rev(c("G", "A", "V", "L", "M", "I", "F", - "Y", "W", "K", "R", "H", "D", "E", + amino_order <- rev(c("G", "A", "V", "L", "M", "I", "F", + "Y", "W", "K", "R", "H", "D", "E", "S", "T", "C", "N", "Q", "P", "*")) d %>% mutate( @@ -158,8 +158,8 @@ white_tail_rects <- function(d, positions_per_row = 75) { } plot_heatmap <- function(d, title_text, positions_per_row = 75) { - amino_order <- rev(c("G", "A", "V", "L", "M", "I", "F", - "Y", "W", "K", "R", "H", "D", "E", + amino_order <- rev(c("G", "A", "V", "L", "M", "I", "F", + "Y", "W", "K", "R", "H", "D", "E", "S", "T", "C", "N", "Q", "P", "*")) d <- d %>% mutate(mut_aa = factor(mut_aa, levels = amino_order)) diff --git a/modules/local/dmsanalysis/bin/gatk_to_fitness.R b/modules/local/dmsanalysis/bin/gatk_to_fitness.R index 5004d97..ff150b3 100644 --- a/modules/local/dmsanalysis/bin/gatk_to_fitness.R +++ b/modules/local/dmsanalysis/bin/gatk_to_fitness.R @@ -168,13 +168,3 @@ generate_fitness_input <- function(wt_seq_path, gatk_file, pos_range, output_fil # "23-1225", # "/Users/benjaminwehnert/CRG/DMS_QC/testing_data/testing_outputs/dimsum_input.tsv" # ) - - - - - - - - - - diff --git a/modules/local/dmsanalysis/bin/global_position_biases_counts_and_counts_per_cov.R b/modules/local/dmsanalysis/bin/global_position_biases_counts_and_counts_per_cov.R index 142e1c9..51c5283 100644 --- a/modules/local/dmsanalysis/bin/global_position_biases_counts_and_counts_per_cov.R +++ b/modules/local/dmsanalysis/bin/global_position_biases_counts_and_counts_per_cov.R @@ -147,17 +147,3 @@ position_biases <- function(prefiltered_gatk_path, aa_seq_path, window_size = 10 # Example call to the function #position_biases("/Users/benjaminwehnert/CRG/DMS_QC/testing_data/gatk_filtered_by_codon_library.csv", "/Users/benjaminwehnert/CRG/DMS_QC/testing_data/aa_seq.txt", window_size = 18, output_path_folder = "/Users/benjaminwehnert/CRG/DMS_QC/testing_data/testing_outputs") - - - - - - - - - - - - - - diff --git a/modules/local/dmsanalysis/bin/global_position_biases_cov.R b/modules/local/dmsanalysis/bin/global_position_biases_cov.R index 857f4f2..2f2235c 100644 --- a/modules/local/dmsanalysis/bin/global_position_biases_cov.R +++ b/modules/local/dmsanalysis/bin/global_position_biases_cov.R @@ -80,7 +80,3 @@ position_biases <- function(prefiltered_gatk_path, aa_seq_path, window_size = 10 # Example call to the function #position_biases("/Users/benjaminwehnert/CRG/DMS_QC/testing_data/gatk_filtered_by_codon_library.csv", "/Users/benjaminwehnert/CRG/DMS_QC/testing_data/aa_seq.txt", window_size = 18, output_path_folder = "/Users/benjaminwehnert/CRG/DMS_QC/testing_data/testing_outputs/rolling_coverage.pdf", targeted_counts_per_aa_variant = 15) - - - - diff --git a/modules/local/dmsanalysis/bin/install_packages.R b/modules/local/dmsanalysis/bin/install_packages.R index 2f98481..7846c92 100644 --- a/modules/local/dmsanalysis/bin/install_packages.R +++ b/modules/local/dmsanalysis/bin/install_packages.R @@ -79,4 +79,3 @@ system("xml2-config --version") system("zlib-flate -version") system("ldconfig -p | grep zlib") - diff --git a/modules/local/dmsanalysis/bin/logdiff.R b/modules/local/dmsanalysis/bin/logdiff.R index 5982d6d..246cffe 100644 --- a/modules/local/dmsanalysis/bin/logdiff.R +++ b/modules/local/dmsanalysis/bin/logdiff.R @@ -144,6 +144,3 @@ library(scales) # # Example call # logdiff_plot_codon_mut("/Users/benjaminwehnert/CRG/DMS_QC/testing_data/testing_outputs/completed_prefiltered_gatk.csv", "/Users/benjaminwehnert/CRG/DMS_QC/testing_data/testing_outputs") # - - - diff --git a/modules/local/dmsanalysis/bin/low_count_variants.R b/modules/local/dmsanalysis/bin/low_count_variants.R index ef1902b..03cf8a7 100644 --- a/modules/local/dmsanalysis/bin/low_count_variants.R +++ b/modules/local/dmsanalysis/bin/low_count_variants.R @@ -125,6 +125,3 @@ ggplot(proportion_data, aes(x = group, y = proportion_with_pur_mutation, fill = labs(x = "Group", y = "Proportion of Variants", title = "Proportion of Variants with at Least One Pur_pyr or Pur_pur Mutation") + scale_fill_manual(values = c("Top 90%" = "steelblue", "Bottom 10%" = "tomato")) + theme_minimal() - - - diff --git a/modules/local/dmsanalysis/bin/merge_counts.R b/modules/local/dmsanalysis/bin/merge_counts.R index cfe5275..2956351 100644 --- a/modules/local/dmsanalysis/bin/merge_counts.R +++ b/modules/local/dmsanalysis/bin/merge_counts.R @@ -6,7 +6,7 @@ merge_dimsum_counts <- function(input_paths, output_paths, out_path = "counts.tsv") { # input_paths, output_paths: character vectors of file paths # out_path: output TSV path - + # Helper to read a 2-col TSV without header -> data.frame(nt_seq, count) read_counts <- function(fp) { df <- utils::read.table( @@ -17,17 +17,17 @@ merge_dimsum_counts <- function(input_paths, output_paths, out_path = "counts.ts ) df } - + # Read all inputs / outputs input_list <- lapply(input_paths, read_counts) output_list <- lapply(output_paths, read_counts) - + # Collect universe of sequences all_seqs <- unique(c( unlist(lapply(input_list, function(x) x$nt_seq)), unlist(lapply(output_list, function(x) x$nt_seq)) )) - + # Pre-allocate output frame n_in <- length(input_list) n_out <- length(output_list) @@ -43,7 +43,7 @@ merge_dimsum_counts <- function(input_paths, output_paths, out_path = "counts.ts stringsAsFactors = FALSE, check.names = FALSE ) names(out) <- col_names - + # Fill inputs if (n_in > 0) { for (i in seq_len(n_in)) { @@ -52,7 +52,7 @@ merge_dimsum_counts <- function(input_paths, output_paths, out_path = "counts.ts out[idx, paste0("input", i)] <- df$count } } - + # Fill outputs if (n_out > 0) { for (j in seq_len(n_out)) { @@ -61,7 +61,7 @@ merge_dimsum_counts <- function(input_paths, output_paths, out_path = "counts.ts out[idx, paste0("output", j)] <- df$count } } - + # Write TSV with header, no row names, no quotes utils::write.table(out, file = out_path, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE) invisible(out) @@ -72,7 +72,7 @@ merge_dimsum_counts <- function(input_paths, output_paths, out_path = "counts.ts # Rscript merge_counts.R --inputs ... --outputs ... --out counts.tsv if (sys.nframe() == 0) { args <- commandArgs(trailingOnly = TRUE) - + # simple flag parser that supports space-separated lists get_vals <- function(flag) { if (!(flag %in% args)) return(character(0)) @@ -82,15 +82,15 @@ if (sys.nframe() == 0) { if (stop <= start) return(character(0)) args[(start + 1):stop] } - + input_paths <- get_vals("--inputs") output_paths <- get_vals("--outputs") out_path <- get_vals("--out") out_path <- if (length(out_path)) out_path[1] else "counts.tsv" - + if (!length(input_paths) && !length(output_paths)) { stop("No inputs/outputs provided. Use --inputs and/or --outputs .") } - + merge_dimsum_counts(input_paths, output_paths, out_path) } diff --git a/modules/local/dmsanalysis/bin/possible_mutations.R b/modules/local/dmsanalysis/bin/possible_mutations.R index 1206d58..97ce5fa 100644 --- a/modules/local/dmsanalysis/bin/possible_mutations.R +++ b/modules/local/dmsanalysis/bin/possible_mutations.R @@ -98,8 +98,3 @@ generate_possible_variants <- function(wt_seq_input, start_stop_pos, mutagenesis # Possibly generate a custom codons file: "AAA, AAC, AAG, AAT, ..." # generate_possible_variants("/Users/benjaminwehnert/CRG/DMS_QC/testing_data/MORtn5_reference.fa", "23-1225", "/Users/benjaminwehnert/CRG/DMS_QC/testing_data/possible_NNK_mutations.csv", mutagenesis_type = "nnk") ### this one's correct for the dataset # generate_possible_variants("/Users/benjaminwehnert/CRG/DMS_QC/testing_data/MORtn5_reference.fa", "23-1225", "/Users/benjaminwehnert/CRG/DMS_QC/testing_data/testing_outputs/possible_NNK_mutations_taylors_nnk_and_nns.csv", mutagenesis_type = "max_diff_to_wt") - - - - - diff --git a/modules/local/dmsanalysis/bin/process_raw_gatk.R b/modules/local/dmsanalysis/bin/process_raw_gatk.R index 292fa6c..d185ad7 100644 --- a/modules/local/dmsanalysis/bin/process_raw_gatk.R +++ b/modules/local/dmsanalysis/bin/process_raw_gatk.R @@ -42,5 +42,3 @@ process_raw_gatk <- function(gatk_file_path, output_csv_path) { # Example usage (can be used for testing): # process_raw_gatk("/path/to/gatk_file.txt", "/path/to/output_file.csv") #process_raw_gatk("/Users/benjaminwehnert/CRG/DMS_QC/testing_data/output_premerged_vsearch.variantCounts", "/Users/benjaminwehnert/CRG/DMS_QC/testing_data/raw_gatk.csv") - - diff --git a/modules/local/dmsanalysis/possiblemutations.nf b/modules/local/dmsanalysis/possiblemutations.nf index 344eb0b..c2a3361 100644 --- a/modules/local/dmsanalysis/possiblemutations.nf +++ b/modules/local/dmsanalysis/possiblemutations.nf @@ -4,8 +4,8 @@ process DMSANALYSIS_POSSIBLE_MUTATIONS { conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' - ? 'community.wave.seqera.io/library/bioconductor-biostrings_r-base_r-biocmanager_r-dplyr_pruned:ce2ba7ad7f6e7f2c' + container "${ workflow.containerEngine == 'singularity' + ? 'community.wave.seqera.io/library/bioconductor-biostrings_r-base_r-biocmanager_r-dplyr_pruned:ce2ba7ad7f6e7f2c' : 'community.wave.seqera.io/library/bioconductor-biostrings_r-base_r-biocmanager_r-dplyr_pruned:0fd2e39a5bf2ecaa' }" input: @@ -39,7 +39,7 @@ process DMSANALYSIS_POSSIBLE_MUTATIONS { DMSANALYSIS_POSSIBLE_MUTATIONS: r-base: \$R_VERSION biostrings: \$BIOSTRINGS_VERSION - END_VERSIONS + END_VERSIONS """ stub: diff --git a/modules/local/dmsanalysis/processgatk.nf b/modules/local/dmsanalysis/processgatk.nf index 50bc4e0..15bf3cc 100644 --- a/modules/local/dmsanalysis/processgatk.nf +++ b/modules/local/dmsanalysis/processgatk.nf @@ -3,8 +3,8 @@ process DMSANALYSIS_PROCESS_GATK { label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' - ? 'community.wave.seqera.io/library/bioconductor-biostrings_r-base_r-biocmanager_r-dplyr_pruned:ce2ba7ad7f6e7f2c' + container "${ workflow.containerEngine == 'singularity' + ? 'community.wave.seqera.io/library/bioconductor-biostrings_r-base_r-biocmanager_r-dplyr_pruned:ce2ba7ad7f6e7f2c' : 'community.wave.seqera.io/library/bioconductor-biostrings_r-base_r-biocmanager_r-dplyr_pruned:0fd2e39a5bf2ecaa' }" publishDir "${params.outdir}/intermediate_files", mode: 'copy' diff --git a/modules/local/fitness/fitness_experimental_design.nf b/modules/local/fitness/fitness_experimental_design.nf index e6ec6a2..83ba0b3 100644 --- a/modules/local/fitness/fitness_experimental_design.nf +++ b/modules/local/fitness/fitness_experimental_design.nf @@ -27,4 +27,4 @@ process EXPDESIGN_FITNESS { r-base: \$R_VERSION END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/local/fitness/fitness_standard.nf b/modules/local/fitness/fitness_standard.nf index 4606077..ace8b4f 100644 --- a/modules/local/fitness/fitness_standard.nf +++ b/modules/local/fitness/fitness_standard.nf @@ -3,8 +3,8 @@ process FITNESS_CALCULATION { label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' - ? 'community.wave.seqera.io/library/bioconductor-biostrings_r-base_r-biocmanager_r-dplyr_pruned:ce2ba7ad7f6e7f2c' + container "${ workflow.containerEngine == 'singularity' + ? 'community.wave.seqera.io/library/bioconductor-biostrings_r-base_r-biocmanager_r-dplyr_pruned:ce2ba7ad7f6e7f2c' : 'community.wave.seqera.io/library/bioconductor-biostrings_r-base_r-biocmanager_r-dplyr_pruned:0fd2e39a5bf2ecaa' }" input: @@ -22,7 +22,7 @@ process FITNESS_CALCULATION { set -euo pipefail R_version=\$(R --version | head -n 1 | sed 's/^R version //') - + Rscript -e "source('$script'); run_fitness_estimation('$counts_merged', '$exp_design', '$syn_wt_txt', 'fitness_estimation.tsv')" cat > versions.yml < [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } - def rename_to = old_new_pairs*.join(' ').join(' ') + def rename_to = old_new_pairs*.join(' ').join(' ') def renamed_files = old_new_pairs.collect{ _old_name, new_name -> new_name }.join(' ') // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label - def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus + def memory_in_mb = task.memory ? task.memory.toUnit('MB') / task.cpus : null // FastQC memory value allowed range (100 - 10000) def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) """ - printf "%s %s\\n" $rename_to | while read old_name new_name; do + printf "%s %s\\n" ${rename_to} | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name done fastqc \\ - $args \\ - --threads $task.cpus \\ - --memory $fastqc_memory \\ - $renamed_files + ${args} \\ + --threads ${task.cpus} \\ + --memory ${fastqc_memory} \\ + ${renamed_files} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 2b2e62b..c8d9d02 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -29,9 +29,10 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + ontologies: [] output: - - html: - - meta: + html: + - - meta: type: map description: | Groovy Map containing sample information @@ -40,8 +41,9 @@ output: type: file description: FastQC report pattern: "*_{fastqc.html}" - - zip: - - meta: + ontologies: [] + zip: + - - meta: type: map description: | Groovy Map containing sample information @@ -50,11 +52,14 @@ output: type: file description: FastQC report archive pattern: "*_{fastqc.zip}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@drpatelh" - "@grst" diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml deleted file mode 100644 index 7834294..0000000 --- a/modules/nf-core/fastqc/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -fastqc: - - modules/nf-core/fastqc/** diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index 6f5b867..dd513cb 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,5 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::multiqc=1.25.1 + - bioconda::multiqc=1.31 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index cc0643e..5288f5c 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' : - 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ef/eff0eafe78d5f3b65a6639265a16b89fdca88d06d18894f90fcdb50142004329/data' : + 'community.wave.seqera.io/library/multiqc:1.31--1efbafd542a23882' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index b16c187..ce30eb7 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -15,57 +15,71 @@ tools: licence: ["GPL-3.0-or-later"] identifier: biotools:multiqc input: - - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections - in multiqc_config. - pattern: "*.{yml,yaml}" - - - multiqc_logo: + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + ontologies: [] + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + ontologies: [] + - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV +output: + report: + - "*multiqc_report.html": type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" - - - replace_names: + description: MultiQC report file + pattern: "multiqc_report.html" + ontologies: [] + data: + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + plots: + - "*_plots": type: file - description: | - Optional two-column sample renaming file. First column a set of - patterns, second column a set of corresponding replacements. Passed via - MultiQC's `--replace-names` option. - pattern: "*.{tsv}" - - - sample_names: + description: Plots created by MultiQC + pattern: "*_data" + ontologies: [] + versions: + - versions.yml: type: file - description: | - Optional TSV file with headers, passed to the MultiQC --sample_names - argument. - pattern: "*.{tsv}" -output: - - report: - - "*multiqc_report.html": - type: file - description: MultiQC report file - pattern: "multiqc_report.html" - - data: - - "*_data": - type: directory - description: MultiQC data dir - pattern: "multiqc_data" - - plots: - - "*_plots": - type: file - description: Plots created by MultiQC - pattern: "*_data" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@abhi18av" - "@bunop" diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index 2fcbb5f..17881d1 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + "versions.yml:md5,8968b114a3e20756d8af2b80713bcc4f" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "25.04.6" }, - "timestamp": "2024-10-02T17:51:46.317523" + "timestamp": "2025-09-08T20:57:36.139055243" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + "versions.yml:md5,8968b114a3e20756d8af2b80713bcc4f" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "25.04.6" }, - "timestamp": "2024-10-02T17:52:20.680978" + "timestamp": "2025-09-08T20:59:15.142230631" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + "versions.yml:md5,8968b114a3e20756d8af2b80713bcc4f" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "25.04.6" }, - "timestamp": "2024-10-02T17:52:09.185842" + "timestamp": "2025-09-08T20:58:29.629087066" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml deleted file mode 100644 index bea6c0d..0000000 --- a/modules/nf-core/multiqc/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -multiqc: - - modules/nf-core/multiqc/** diff --git a/nextflow.config b/nextflow.config index 9c081f3..337f725 100644 --- a/nextflow.config +++ b/nextflow.config @@ -40,13 +40,15 @@ params { email_on_fail = null plaintext_email = false monochrome_logs = false - hook_url = null + hook_url = System.getenv('HOOK_URL') help = false help_full = false show_hidden = false version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/BenjaminWehnert1008/test-datasets/dmsqc/dmsqc/' - trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options + trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + + // Config options config_profile_name = null config_profile_description = null @@ -99,8 +101,19 @@ profiles { apptainer.enabled = false process.containerOptions = '-u $(id -u):$(id -g)' } - arm { - process.containerOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + arm64 { + process.arch = 'arm64' + // TODO https://github.com/nf-core/modules/issues/6694 + // For now if you're using arm64 you have to use wave for the sake of the maintainers + // wave profile + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + emulate_amd64 { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true @@ -156,17 +169,10 @@ profiles { wave.freeze = true wave.strategy = 'conda,container' } - gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB - process { - resourceLimits = [ - memory: 8.GB, - cpus : 4, - time : 1.h - ] - } + gpu { + docker.runOptions = '-u $(id -u):$(id -g) --gpus all' + apptainer.runOptions = '--nv' + singularity.runOptions = '--nv' } local { process { @@ -223,12 +229,9 @@ profiles { test_full { includeConfig 'conf/test_full.config' } } -// Load nf-core custom profiles from different Institutions -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" - +// If params.custom_config_base is set AND either the NXF_OFFLINE environment variable is not set or params.custom_config_base is a local path, the nfcore_custom.config file from the specified base path is included. // Load nf-core/deepmutscan custom profiles from different institutions. -// TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs -// includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/deepmutscan.config" : "/dev/null" +includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled @@ -301,46 +304,19 @@ manifest { description = """Until now, most Deep Mutational Scanning (DMS) experiments relied on variant-specific barcoded libraries for sequencing. This method enabled DMS on large proteins and led to many great publications. Recently, efforts have increased to make use of the classic and more simple random fragmentation-based short-read sequencing (“shotgun-sequencing”). This saves time and money and due to its simpler experimental design is less prone to mistakes. dmscore handles the essential computational steps, processing the raw FASTQ files and generating a count table of variants. Along the way, it provides multiple QC metrics, enabling users to quickly evaluate the success of their experimental setup.""" mainScript = 'main.nf' defaultBranch = 'master' - nextflowVersion = '!>=24.04.2' + nextflowVersion = '!>=25.04.0' version = '1.0.0' doi = '' } // Nextflow plugins plugins { - id 'nf-schema@2.3.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.5.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { defaultIgnoreParams = ["genomes"] monochromeLogs = params.monochrome_logs - help { - enabled = true - command = "nextflow run nf-core/deepmutscan -profile --input samplesheet.csv --outdir " - fullParameter = "help_full" - showHiddenParameter = "show_hidden" - beforeText = """ --\033[2m----------------------------------------------------\033[0m- - \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m -\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m -\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m -\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m - \033[0;32m`._,._,\'\033[0m -\033[0;35m nf-core/deepmutscan ${manifest.version}\033[0m --\033[2m----------------------------------------------------\033[0m- -""" - afterText = """${manifest.doi ? "\n* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} -* The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x - -* Software dependencies - https://github.com/nf-core/deepmutscan/blob/master/CITATIONS.md -""" - } - summary { - beforeText = validation.help.beforeText - afterText = validation.help.afterText - } } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index b71860f..16837e1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -260,6 +260,18 @@ "fa_icon": "far calendar", "description": "Suffix to add to the trace report filename. Default is the date and time in the format yyyy-MM-dd_HH-mm-ss.", "hidden": true + }, + "help": { + "type": ["boolean", "string"], + "description": "Display the help message." + }, + "help_full": { + "type": "boolean", + "description": "Display the full detailed help message." + }, + "show_hidden": { + "type": "boolean", + "description": "Display hidden parameters in the help message (only works when --help or --help_full are provided)." } } } diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..3a1fff5 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,24 @@ +config { + // location for all nf-test tests + testsDir "." + + // nf-test directory including temporary files for each test + workDir System.getenv("NFT_WORKDIR") ?: ".nf-test" + + // location of an optional nextflow.config file specific for executing tests + configFile "tests/nextflow.config" + + // ignore tests coming from the nf-core/modules repo + ignore 'modules/nf-core/**/tests/*', 'subworkflows/nf-core/**/tests/*' + + // run all test with defined profile(s) from the main nextflow.config + profile "test" + + // list of filenames or patterns that should be trigger a full test run + triggers 'nextflow.config', 'nf-test.config', 'conf/test.config', 'tests/nextflow.config', 'tests/.nftignore' + + // load the necessary plugins + plugins { + load "nft-utils@0.0.3" + } +} diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index afccb89..ea70212 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "Stable", "datePublished": "2025-11-10T09:04:57+00:00", - "description": "

\n \n \n \"nf-core/deepmutscan\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/deepmutscan/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/deepmutscan/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/deepmutscan/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/deepmutscan/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/deepmutscan/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/deepmutscan)\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23deepmutscan-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/deepmutscan)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n# 1. Overview\n**nf-core/deepmutscan** is a reproducible, scalable, and community-curated pipeline for analyzing deep mutational scanning (DMS) data using shotgun DNA sequencing. DMS enables researchers to measure the fitness effects of thousands of gene variants simultaneously, helping to classify disease causing mutants in human and animal populations, to learn fundamental rules of virus evolution, protein architecture, splicing or small-molecule interactions.\n\nWhile DNA synthesis and sequencing technologies have advanced substantially, long open reading frame (ORF) targets still present major challenges for DMS studies. Shotgun DNA sequencing can be used to greatly speed up the inference of long ORF mutant fitness landscapes, theoretically at no expense in accuracy. We have designed the **nf-core/deepmutscan** pipeline to unlock the power of shotgun sequencing based DMS studies on long ORFs, to simplify and standardise the complex bioinformatics steps involved in data processing of such experiments \u2013 from read alignment to QC reporting and fitness landscape inferences.\n\n> \ud83d\udcc4 Reference: Wehnert et al., _bioRxiv_ preprint (coming soon)\n\n---\n\n# 2. Features of nf-core/deepmutscan\n- End-to-end analyses of DMS shotgun sequencing data\n- Modular, three-stage workflow: alignment \u2192 QC \u2192 error-aware fitness estimation\n- Integrates with popular statistical tools like [DiMSum](https://github.com/lehner-lab/DiMSum), [Enrich2](https://github.com/FowlerLab/Enrich2), [rosace](https://github.com/pimentellab/rosace/) and [mutscan](https://github.com/fmicompbio/mutscan)\n- Supports multiple mutagenesis strategies, e.g. nicking by NNK and NNS codons\n- Containerized via Docker, Singularity and Apptainer\n- Scalable across HPC and Cloud systems\n- Monitors CPU, memory, and CO\u2082 usage\n\nFor details of the pipeline and potential future expansions, please consider reading our [detailed description](docs/pipeline_steps.md).\n\n---\n\n# 3. Installation\n**nf-core/deepmutscan** uses [Nextflow](https://nf-co.re/docs/usage/getting_started/installation), which must be installed on your system:\n\n```bash\njava -version # Check that Java v11+ is installed\ncurl -s https://get.nextflow.io | bash # Download Nextflow\nchmod +x nextflow # Make executable\nmv nextflow ~/bin/ # Add to user's $PATH\n```\n\nThe pipeline itself requires no installation \u2013 Nextflow will fetch it directly from GitHub:\n\n```bash\nnextflow run nf-core/deepmutscan -profile docker\n```\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/deepmutscan/usage) and the [parameter documentation](https://nf-co.re/deepmutscan/parameters).\n\n---\n\n# 4. Usage\nPrepare:\n- A **sample sheet** CSV to specify input/output labels, replicates, etc. (see [example](assets/samplesheet.csv))\n- A **reference FASTA** file for the gene or region of interest\n\nTo execute **nf-core/deepmutscan**, run the basic command:\n\n```bash\nnextflow run nf-core/deepmutscan \\\n -profile singularity,local \\\n --input ./input.csv \\\n --reading_frame 1-300 \\\n --fasta ./ref.fa \\\n --mutagenesis max_diff_to_wt \\\n --run_seqdepth false \\\n --fitness true \\\n --outdir ./results\n```\n\n### Required parameters\n\n| Parameter | Description |\n|--------------------|-----------------------------------------------------|\n| `--input` | Path to sample sheet CSV |\n| `--outdir` | Path to output directory |\n| `--fasta` | Reference FASTA file |\n| `--reading_frame` | Start and end nucleotide (e.g. `1-300`) |\n\n### Optional parameters *(in development)*\n\n| Parameter | Default | Description |\n|------------------------|-------------|-------------------------------------------------|\n| `--run_seqdepth` | `false` | Estimate sequencing saturation by rarefaction |\n| `--fitness` | `false` | Default fitness inference module |\n| `--dimsum` | `false` | Optional fitness inference module *(AMD/x86_64 systems only)* |\n| `--mutagenesis` | `max_diff_to_wt` | Deep mutational scanning strategy used *(in development)* |\n| `--error-estimation` | `wt_sequencing` | Error model used to correct 1nt counts *(in development)* |\n| `--read-align` | `bwa-mem` | Read aligner *(in development)* |\n\nMore options and advanced configuration: [see vignette](link). For further information or help, don't hesitate to get in touch on the [Slack `#deepmutscan` channel](https://nfcore.slack.com/channels/deepmutscan) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n---\n\n# 5. Input Data\n\nThe primary pipeline input is a sample sheet `.csv` file listing:\n\n- Paths to paired-end `.fastq.gz` files from shotgun sequencing\n- Their classification as either input or output samples\n- Replicate IDs\n- Associated experimental metadata\n\nSee [sample CSV](assets/samplesheet.csv) for formatting.\n\n---\n\n# 6. Outputs\n\nAfter execution, the pipeline creates the following directory structure:\n\n```\nresults/\n\u251c\u2500\u2500 fastqc/ # Individual HTML reports for specified fastq files, raw sequencing QC\n\u251c\u2500\u2500 fitness/ # Merged variant count tables, fitness and error estimates, replicate correlations and heatmaps\n\u251c\u2500\u2500 intermediate_files/ # Raw alignments, raw and pre-filtered variant count tables, QC reports\n\u251c\u2500\u2500 library_QC/ # Sample-specific PDF visualizations: position-wise sequencing coverage, count heatmaps, etc.\n\u251c\u2500\u2500 multiqc/ # Shared HTML reports for all fastq files, raw sequencing QC\n\u251c\u2500\u2500 pipelineinfo/ # Nextflow helper files for timeline and summary report generation\n\u251c\u2500\u2500 timeline.html # Nextflow timeline for all tasks\n\u2514\u2500\u2500 report.html # Nextflow summary report incl. detailed CPU and memory usage per for all tasks\n```\n\n---\n\n# 7. Citation\n\nIf you use this pipeline in your research, please cite:\n> \ud83d\udcc4 Wehnert et al., _bioRxiv_ preprint (coming soon)\n\nPlease also cite the nf-core framework:\n> \ud83d\udcc4 Ewels et al., _Nature Biotechnology_, 2020 \n> [https://doi.org/10.1038/s41587-020-0439-x](https://doi.org/10.1038/s41587-020-0439-x)\n\n---\n\n# 8. License\n\n[MIT License](link)\n\n© 2025 Benjamin Wehnert, Taylor Mighell, Fei Sang, Ben Lehner, Maximilian Stammnitz\n\n---\n\n# 9. Contributing\n\nWe welcome contributions from the community!\n\nPlease open an [issue](../../issues/new) or [pull request](../../compare) via this GitHub page, to:\n- Suggest or help implementing new modules for custom workflows\n- Report bugs and other challenges in running **nf-core/deepmutscan**\n- Help improve this documentation\n\nYou can also reach out to us via the **nf-core Slack**, by use of the `#dms` channel ([join here](https://join.slack.com/share/enQtOTMyMDc3MTA0Mzg0Mi04YmRiNDEwZTBlOTRiN2M2ZGU5ZGVmOWQ3YzA0YjA4NzhiNjFhNTVlNDA4ZTZjOTE2MjE5MmIzYWZjZTljMTE3)).\n\n---\n\n# 10. Contact\n\nFor detailled scientific or technical questions, feedback and experimental discussions, feel free to contact us directly:\n\n- Benjamin Wehnert \u2014 wehnertbenjamin@gmail.com \n- Maximilian Stammnitz \u2014 maximilian.stammnitz@crg.eu\n\n---\n", + "description": "

\n \n \n \"nf-core/deepmutscan\"\n \n

\n\n[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new/nf-core/deepmutscan)\n[![GitHub Actions CI Status](https://github.com/nf-core/deepmutscan/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/deepmutscan/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/deepmutscan/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/deepmutscan/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/deepmutscan/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.4.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.4.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/deepmutscan)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23deepmutscan-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/deepmutscan)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/deepmutscan** is a workflow designed for the analysis of deep mutational scanning (DMS) data. DMS enables researchers to experimentally measure the fitness effects of thousands of genes or gene variants simultaneously, helping to classify disease causing mutants in human and animal populations, to learn the fundamental rules of virus evolution, protein architecture, splicing, small-molecule interactions and many other phenotypes.\n\nWhile DNA synthesis and sequencing technologies have advanced substantially, long open reading frame (ORF) targets still present major challenges for DMS studies. Shotgun DNA sequencing can be used to greatly speed up the inference of long ORF mutant fitness landscapes, theoretically at no expense in accuracy. We have designed the `nf-core/deepmutscan` pipeline to unlock the power of shotgun sequencing based DMS studies on long ORFs, to simplify and standardise the complex bioinformatics steps involved in data processing of such experiments \u2013 from read alignment to QC reporting and fitness landscape inferences.\n\n

\n \n

\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/deepmutscan/results).\n\n## Major features\n\n- End-to-end analyses of various DMS data\n- Modular, three-stage workflow: alignment \u2192 QC \u2192 error-aware fitness estimation\n- Integration with popular statistical fitness estimation tools like [DiMSum](https://github.com/lehner-lab/DiMSum), [Enrich2](https://github.com/FowlerLab/Enrich2), [rosace](https://github.com/pimentellab/rosace/) and [mutscan](https://github.com/fmicompbio/mutscan)\n- Support of multiple mutagenesis strategies, e.g. by nicking with degenerate NNK and NNS codons\n- Containerisation via Docker, Singularity and Apptainer\n- Scalability across HPC and Cloud systems\n- Monitoring of CPU, memory, and CO\u2082 usage\n\nFor more details on the pipeline and on potential future expansions, please consider reading our [usage description](https://nf-co.re/deepmutscan/usage).\n\n## Step-by-step pipeline summary\n\nThe pipeline processes deep mutational scanning (DMS) sequencing data in several stages:\n\n1. Alignment of reads to the reference open reading frame (ORF) (`BWA-mem`)\n2. Filtering of wildtype and erroneous reads (`samtools view`)\n3. Read merging for base error reduction (`vsearch merge`, `BWA-mem`)\n4. Mutation counting (`GATK AnalyzeSaturationMutagenesis`)\n5. DMS library quality control\n6. Data summarisation across samples\n7. Single nucleotide variant error correction _(in development)_\n8. Fitness estimation _(in development)_\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input/output data in which each row represents a pair of fastq files (paired end). This should look as follows:\n\n```csv title=\"samplesheet.csv\"\nsample,type,replicate,file1,file2\nORF1,input,1,/reads/forward1.fastq.gz,/reads/reverse1.fastq.gz\nORF1,input,2,/reads/forward2.fastq.gz,/reads/reverse2.fastq.gz\nORF1,output,1,/reads/forward3.fastq.gz,/reads/reverse3.fastq.gz\nORF1,output,2,/reads/forward4.fastq.gz,/reads/reverse4.fastq.gz\n```\n\nSecondly, specify the gene or gene region of interest using a reference FASTA file via `--fasta`. Provide the exact codon coordinates using `--reading_frame`.\n\nNow, you can run the pipeline using:\n\n```bash title=\"example pipeline run\"\nnextflow run nf-core/deepmutscan \\\n -profile \\\n --input ./samplesheet.csv \\\n --fasta ./ref.fa \\\n --reading_frame 1-300 \\\n --outdir ./results\n```\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/deepmutscan/results) tab on the nf-core website pipeline page.\n\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/deepmutscan/output).\n\n## Contributing\n\nWe welcome contributions from the community!\n\nFor technical challenges and feedback on the pipeline, please use our [Github repository](https://github.com/nf-core/deepmutscan). Please open an [issue](https://github.com/nf-core/deepmutscan/issues/new) or [pull request](https://github.com/nf-core/deepmutscan/compare) to:\n\n- Report bugs or solve data incompatibilities when running `nf-core/deepmutscan`\n- Suggest the implementation of new modules for custom DMS workflows\n- Help improve this documentation\n\nIf you are interested in getting involved as a developer, please consider joining our interactive [`#deepmutscan` Slack channel](https://nfcore.slack.com/channels/deepmutscan) (via [this invite](https://nf-co.re/join/slack)).\n\n## Credits\n\nnf-core/deepmutscan was originally written by [Benjamin Wehnert](https://github.com/BenjaminWehnert1008) and [Max Stammnitz](https://github.com/MaximilianStammnitz) at the [Centre for Genomic Regulation, Barcelona](https://www.crg.eu/), with the generous support of an EMBO Long-term Postdoctoral Fellowship and a Marie Sk\u0142odowska-Curie grant by the European Union.\n\nIf you use `nf-core/deepmutscan` in your analyses, please cite:\n\n> \ud83d\udcc4 Wehnert et al., _bioRxiv_ preprint (coming soon)\n\nPlease also cite the `nf-core` framework:\n\n> \ud83d\udcc4 Ewels et al., _Nature Biotechnology_, 2020\n> [https://doi.org/10.1038/s41587-020-0439-x](https://doi.org/10.1038/s41587-020-0439-x)\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#deepmutscan` channel](https://nfcore.slack.com/channels/deepmutscan) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Scientific contact\n\nFor scientific discussions around the use of this pipeline (e.g. on experimental design or sequencing data requirements), please feel free to get in touch with us directly:\n\n- Benjamin Wehnert \u2014 wehnertbenjamin@gmail.com\n- Maximilian Stammnitz \u2014 maximilian.stammnitz@crg.eu\n", "hasPart": [ { "@id": "main.nf" diff --git a/subworkflows/local/utils_nfcore_deepmutscan_pipeline/main.nf b/subworkflows/local/utils_nfcore_deepmutscan_pipeline/main.nf index fca70b3..2368e89 100644 --- a/subworkflows/local/utils_nfcore_deepmutscan_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_deepmutscan_pipeline/main.nf @@ -11,6 +11,7 @@ include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' include { paramsSummaryMap } from 'plugin/nf-schema' include { samplesheetToList } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' include { imNotification } from '../../nf-core/utils_nfcore_pipeline' @@ -32,6 +33,9 @@ workflow PIPELINE_INITIALISATION { nextflow_cli_args // array: List of positional nextflow CLI args outdir // string: The output directory where the results will be saved input // string: Path to input samplesheet + help // boolean: Display help message and exit + help_full // boolean: Show the full help message + show_hidden // boolean: Show hidden parameters in the help message main: @@ -50,10 +54,35 @@ workflow PIPELINE_INITIALISATION { // // Validate parameters and generate parameter summary to stdout // + before_text = """ +-\033[2m----------------------------------------------------\033[0m- + \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m +\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m +\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m +\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m + \033[0;32m`._,._,\'\033[0m +\033[0;35m nf-core/deepmutscan ${workflow.manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- +""" + after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${workflow.manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/nf-core/deepmutscan/blob/master/CITATIONS.md +""" + command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFSCHEMA_PLUGIN ( workflow, validate_params, - null + null, + help, + help_full, + show_hidden, + before_text, + after_text, + command ) // @@ -275,4 +304,3 @@ def methodsDescriptionText(mqc_methods_yaml) { return description_html.toString() } - diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml deleted file mode 100644 index f847611..0000000 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/utils_nextflow_pipeline: - - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml deleted file mode 100644 index ac8523c..0000000 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/utils_nfcore_pipeline: - - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf index 4994303..ee4738c 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/main.nf +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -4,6 +4,7 @@ include { paramsSummaryLog } from 'plugin/nf-schema' include { validateParameters } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' workflow UTILS_NFSCHEMA_PLUGIN { @@ -15,29 +16,56 @@ workflow UTILS_NFSCHEMA_PLUGIN { // when this input is empty it will automatically use the configured schema or // "${projectDir}/nextflow_schema.json" as default. This input should not be empty // for meta pipelines + help // boolean: show help message + help_full // boolean: show full help message + show_hidden // boolean: show hidden parameters in help message + before_text // string: text to show before the help message and parameters summary + after_text // string: text to show after the help message and parameters summary + command // string: an example command of the pipeline main: + if(help || help_full) { + help_options = [ + beforeText: before_text, + afterText: after_text, + command: command, + showHidden: show_hidden, + fullHelp: help_full, + ] + if(parameters_schema) { + help_options << [parametersSchema: parameters_schema] + } + log.info paramsHelp( + help_options, + params.help instanceof String ? params.help : "", + ) + exit 0 + } + // // Print parameter summary to stdout. This will display the parameters // that differ from the default given in the JSON schema // + + summary_options = [:] if(parameters_schema) { - log.info paramsSummaryLog(input_workflow, parameters_schema:parameters_schema) - } else { - log.info paramsSummaryLog(input_workflow) + summary_options << [parametersSchema: parameters_schema] } + log.info before_text + log.info paramsSummaryLog(summary_options, input_workflow) + log.info after_text // // Validate the parameters using nextflow_schema.json or the schema // given via the validation.parametersSchema configuration option // if(validate_params) { + validateOptions = [:] if(parameters_schema) { - validateParameters(parameters_schema:parameters_schema) - } else { - validateParameters() + validateOptions << [parametersSchema: parameters_schema] } + validateParameters(validateOptions) } emit: diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test index 8fb3016..c977917 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test @@ -25,6 +25,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -51,6 +57,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -77,6 +89,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -103,6 +121,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -114,4 +138,36 @@ nextflow_workflow { ) } } + + test("Should create a help message") { + + when { + + params { + test_data = '' + outdir = null + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + input[3] = true + input[4] = false + input[5] = false + input[6] = "Before" + input[7] = "After" + input[8] = "nextflow run test/test" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } } diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config index 0907ac5..8d8c737 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -1,8 +1,8 @@ plugins { - id "nf-schema@2.1.0" + id "nf-schema@2.5.1" } validation { parametersSchema = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" monochromeLogs = true -} \ No newline at end of file +} diff --git a/tests/.nftignore b/tests/.nftignore new file mode 100644 index 0000000..e128a12 --- /dev/null +++ b/tests/.nftignore @@ -0,0 +1,12 @@ +.DS_Store +multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt +multiqc/multiqc_data/multiqc.parquet +multiqc/multiqc_data/multiqc.log +multiqc/multiqc_data/multiqc_data.json +multiqc/multiqc_data/multiqc_sources.txt +multiqc/multiqc_data/multiqc_software_versions.txt +multiqc/multiqc_data/llms-full.txt +multiqc/multiqc_plots/{svg,pdf,png}/*.{svg,pdf,png} +multiqc/multiqc_report.html +fastqc/*_fastqc.{html,zip} +pipeline_info/*.{html,json,txt,yml} diff --git a/tests/default.nf.test b/tests/default.nf.test new file mode 100644 index 0000000..efb3834 --- /dev/null +++ b/tests/default.nf.test @@ -0,0 +1,33 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + tag "pipeline" + + test("-profile test") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_deepmutscan_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 0000000..662ccd0 --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,14 @@ +/* +======================================================================================== + Nextflow config file for running nf-test tests +======================================================================================== +*/ + +// TODO nf-core: Specify any additional parameters here +// Or any resources requirements +params { + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/deepmutscan' +} + +aws.client.anonymous = true // fixes S3 access issues on self-hosted runners diff --git a/workflows/deepmutscan.nf b/workflows/deepmutscan.nf index 907e93d..009190d 100644 --- a/workflows/deepmutscan.nf +++ b/workflows/deepmutscan.nf @@ -298,7 +298,7 @@ workflow DEEPMUTSCAN { ch_filter_lib_script, // path(R script) -- N ch_complete_script, // path(R script) -- N ch_prepare_heatmap_script // path(R script) -- N - ) + ) annotated_variantCounts_ch = DMSANALYSIS_PROCESS_GATK.out.processed_variantCounts.map { meta, a, b, c, d -> tuple(meta, a) } variantCounts_filtered_by_library_ch = DMSANALYSIS_PROCESS_GATK.out.processed_variantCounts.map { meta, a, b, c, d -> tuple(meta, b) } @@ -419,7 +419,7 @@ logdiff_scriptN = fanoutTo(library_completed_variantCounts_ch, lo ch_merge_script_for_each // path merge_script (broadcast) ) } - + // Create experimental design file to use for DiMSum if (params.fitness) { EXPDESIGN_FITNESS(