diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe3..b290e090 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -10,15 +10,7 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", - "python.formatting.yapfPath": "/opt/conda/bin/yapf", - "python.linting.flake8Path": "/opt/conda/bin/flake8", - "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", - "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. diff --git a/.editorconfig b/.editorconfig index b6b31907..dd9ffa53 100644 --- a/.editorconfig +++ b/.editorconfig @@ -18,7 +18,20 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset + +# ignore Readme +[README.md] +indent_style = unset + +# ignore python +[*.{py,md}] +indent_style = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index c71d079f..f5305291 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,9 +9,8 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -:::info -If you need help using or modifying nf-core/differentialabundance then the best place to ask is on the nf-core Slack [#differentialabundance](https://nfcore.slack.com/channels/differentialabundance) channel ([join our Slack here](https://nf-co.re/join/slack)). -::: +> [!NOTE] +> If you need help using or modifying nf-core/differentialabundance then the best place to ask is on the nf-core Slack [#differentialabundance](https://nfcore.slack.com/channels/differentialabundance) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow @@ -27,6 +26,12 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -87,7 +92,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 1f6612e5..70d62a73 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -18,7 +18,8 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/diff - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/differentialabundance/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/differentialabundance _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 8d063f29..dd68bc7f 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -28,7 +28,7 @@ jobs: } profiles: test_full - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 1bd534bd..ed9b65e7 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -25,7 +25,7 @@ jobs: } profiles: test - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 3bf724ea..22e2a53a 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -19,7 +19,7 @@ jobs: # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 03d356b4..876a22b1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,13 +34,16 @@ jobs: - "test_soft" steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 with: version: "${{ matrix.NXF_VER }}" + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Run pipeline with test data # You can customise CI pipeline run tests as required # For example: adding multiple test runs with different parameters diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 694e90ec..0b6b1f27 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v7 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..08622fd5 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,72 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: "3.11" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index d43af448..07013265 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + # Install and run pre-commit + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: 3.11 - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "result=pass" >> $GITHUB_OUTPUT - else - echo "result=fail" >> $GITHUB_OUTPUT - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/differentialabundance/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b8bdd214..073e1876 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,72 +11,33 @@ on: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - - uses: actions/setup-node@v3 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.11 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: + python-version: 3.11 + cache: "pip" - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install pre-commit + run: pip install pre-commit - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: "3.11" architecture: "x64" @@ -99,7 +60,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30f..b706875f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcements.yml similarity index 80% rename from .github/workflows/release-announcments.yml rename to .github/workflows/release-announcements.yml index 6ad33927..d468aeaa 100644 --- a/.github/workflows/release-announcments.yml +++ b/.github/workflows/release-announcements.yml @@ -9,6 +9,11 @@ jobs: toot: runs-on: ubuntu-latest steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' >> $GITHUB_OUTPUT + - uses: rzr/fediverse-action@master with: access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} @@ -20,11 +25,13 @@ jobs: Please see the changelog: ${{ github.event.release.html_url }} + ${{ steps.get_topics.outputs.GITHUB_OUTPUT }} #nfcore #openscience #nextflow #bioinformatics + send-tweet: runs-on: ubuntu-latest steps: - - uses: actions/setup-python@v4 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: "3.10" - name: Install dependencies @@ -56,7 +63,7 @@ jobs: bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@v0.0.2 + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/.gitpod.yml b/.gitpod.yml index 25488dcc..105a1821 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,16 +4,17 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc81..d2cda970 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,8 @@ repository_type: pipeline +lint: + nextflow_config: + - config_defaults: + - params.logo_file + - params.css_file + - params.citations_file + - params.report_file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c31cdb9..af57081f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,10 @@ repos: - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v2.7.1" + rev: "v3.1.0" hooks: - id: prettier + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CHANGELOG.md b/CHANGELOG.md index e080218d..3d844a1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,11 +3,49 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.5.0 + +### `Added` + +- [[#266](https://github.com/nf-core/differentialabundance/pull/266)] - Fix logging by specifying assays to log ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) +- [[#259](https://github.com/nf-core/differentialabundance/pull/259)] - Bump gtf2featureannotation to fix GTF handling error ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) +- [[#257](https://github.com/nf-core/differentialabundance/pull/257)] - Added maxquant profile to nextflow.config to make it available ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#254](https://github.com/nf-core/differentialabundance/pull/254)] - Some parameter changes, added qbic credits ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#250](https://github.com/nf-core/differentialabundance/pull/250)] - Template update for nf-core/tools v2.13.1 ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#244](https://github.com/nf-core/differentialabundance/pull/244)] - Add pipeline params for matrixfilter NA options ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#241](https://github.com/nf-core/differentialabundance/pull/241)] - Template update for nf-core/tools v2.13 ([@WackerO](https://github.com/WackerO), review by [@nvnieuwk](https://github.com/nvnieuwk)) +- [[#228](https://github.com/nf-core/differentialabundance/pull/228)] - Template update for nf-core/tools v2.12 ([@nf-core-bot](https://github.com/nf-core-bot), review by [@pinin4fjords](https://github.com/pinin4fjords), [@WackerO](https://github.com/WackerO)) +- [[#222](https://github.com/nf-core/differentialabundance/pull/222)] - Add rounding to all numeric report tables ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#219](https://github.com/nf-core/differentialabundance/pull/219)] - Template update for nf-core/tools v2.11.1 ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#197](https://github.com/nf-core/differentialabundance/pull/197)] - Add contributor info to report ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) + +### `Fixed` + +- [[#267](https://github.com/nf-core/differentialabundance/pull/267)] - Whitespace fix, remove TODO, also update changelog for release release 1.5.0 ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#265](https://github.com/nf-core/differentialabundance/pull/265)] - GSEA- pngs and htmls in same place ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) +- [[#257](https://github.com/nf-core/differentialabundance/pull/257)] - Fixed FILTER_DIFFTABLE module, updated PROTEUS module to better handle whitespace in prefix param, made docs clearer ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#254](https://github.com/nf-core/differentialabundance/pull/254)] - Made differential_file_suffix optional ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#240](https://github.com/nf-core/differentialabundance/pull/240)] - Publish GSEA reports ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) +- [[#231](https://github.com/nf-core/differentialabundance/pull/231)] - Update GSEA module to fix butterfly plot bug ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#226](https://github.com/nf-core/differentialabundance/pull/226)] - Fix DESEQ2_NORM in modules.config ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#221](https://github.com/nf-core/differentialabundance/pull/221)] - Update shinyngs modules to address density plots issue ([@pinin4fjords](https://github.com/pinin4fjords), review by [@maxulysse](https://github.com/maxulysse)) +- [[#223](https://github.com/nf-core/differentialabundance/pull/223)] - tabulartogseacls fixes ([@pinin4fjords](https://github.com/pinin4fjords), review by [@maxulysse](https://github.com/maxulysse)) +- [[#213](https://github.com/nf-core/differentialabundance/pull/213)] - Fix volcano plot legend ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) +- [[#210](https://github.com/nf-core/differentialabundance/pull/210)] - Include Affy timeout fix ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) +- [[#208](https://github.com/nf-core/differentialabundance/pull/208)] - Fix resource issues and bump versions ([@pinin4fjords](https://github.com/pinin4fjords), review by [@sguizard](https://github.com/sguizard)) + +### `Changed` + +- [[#256](https://github.com/nf-core/differentialabundance/pull/256)] - Release 1.5.0 ([@WackerO](https://github.com/WackerO), review by [@maxulysse](https://github.com/maxulysse), [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#264](https://github.com/nf-core/differentialabundance/pull/264)] - Change FILTER_DIFFTABLE to python because AWK does not correctly filter reliably ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#232](https://github.com/nf-core/differentialabundance/pull/232)] - Mention missing dots in volcano plot, change rounding, turn off rounding by default ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) + ## v1.4.0 - 2023-11-27 ### `Added` - [[#203](https://github.com/nf-core/differentialabundance/pull/203)] - Transcript lengths for DESeq2 ([@pinin4fjords](https://github.com/pinin4fjords), review by [@maxulysse](https://github.com/maxulysse)) +- [[#199](https://github.com/nf-core/differentialabundance/pull/199)] - Add gprofiler2 module and local differential table filtering module ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) - [[#193](https://github.com/nf-core/differentialabundance/pull/193)] - Add DESeq2 text to report ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) - [[#192](https://github.com/nf-core/differentialabundance/pull/192)] - Add scree plot in report ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) - [[#189](https://github.com/nf-core/differentialabundance/pull/189)] - Add DE models to report ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) @@ -115,7 +153,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v1.0.1 - 2023-01-25 -- [[#49](https://github.com/nf-core/differentialabundance/pull/49) - Add citation fixes, missing logos, output detail, and trigger Zenodo ([@pinin4fjords](https://github.com/pinin4fjords), review by [@apeltzer](https://github.com/apeltzer), [@jfy133](https://github.com/jfy133)) +- [[#49](https://github.com/nf-core/differentialabundance/pull/49)] - Add citation fixes, missing logos, output detail, and trigger Zenodo ([@pinin4fjords](https://github.com/pinin4fjords), review by [@apeltzer](https://github.com/apeltzer), [@jfy133](https://github.com/jfy133)) ## v1.0.0 - 2023-01-23 diff --git a/CITATIONS.md b/CITATIONS.md index 200368b1..d06c13bf 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -24,13 +24,17 @@ > Love MI, Huber W, Anders S (2014). Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biol. 15(12):550. PubMed PMID: 25516281; PubMed Central PMCID: PMC4302049. +- [GEOQuery](https://pubmed.ncbi.nlm.nih.gov/17496320/) + + > Davis S, Meltzer PS. Geoquery: a bridge between the gene expression omnibus (Geo) and bioconductor. Bioinformatics. 2007;23(14):1846-1847. + - [ggplot2](https://cran.r-project.org/web/packages/ggplot2/index.html) > H. Wickham (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. -- [GEOQuery](https://pubmed.ncbi.nlm.nih.gov/17496320/) +- [gprofiler2](https://cran.r-project.org/web/packages/gprofiler2/index.html) - > Davis S, Meltzer PS. Geoquery: a bridge between the gene expression omnibus (Geo) and bioconductor. Bioinformatics. 2007;23(14):1846-1847. + > Kolberg L, Raudvere U, Kuzmin I, Vilo J, Peterson H (2020). “gprofiler2– an R package for gene list functional enrichment analysis and namespace conversion toolset g:Profiler.” F1000Research, 9 (ELIXIR)(709). R package version 0.2.2. - [Limma](https://pubmed.ncbi.nlm.nih.gov/25605792/) diff --git a/README.md b/README.md index 16847299..adaee862 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,19 @@ -# ![nf-core/differentialabundance](docs/images/nf-core-differentialabundance_logo_light.png#gh-light-mode-only) ![nf-core/differentialabundance](docs/images/nf-core-differentialabundance_logo_dark.png#gh-dark-mode-only) +

+ + + nf-core/differentialabundance + +

-[![GitHub Actions CI Status](https://github.com/nf-core/differentialabundance/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/differentialabundance/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/nf-core/differentialabundance/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/differentialabundance/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/differentialabundance/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7568000-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7568000) +[![GitHub Actions CI Status](https://github.com/nf-core/differentialabundance/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/differentialabundance/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/differentialabundance/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/differentialabundance/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/differentialabundance/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7568000-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7568000) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.10.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/differentialabundance) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/differentialabundance) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23differentialabundance-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/differentialabundance)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) @@ -33,11 +39,8 @@ On release, automated continuous integration tests run the pipeline on a full-si ## Usage -:::note -If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -with `-profile test` before running the workflow on actual data. -::: +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. RNA-seq: @@ -71,11 +74,9 @@ Affymetrix microarray: -profile affy, ``` -:::warning -Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -::: +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/differentialabundance/usage) and the [parameter documentation](https://nf-co.re/differentialabundance/parameters). @@ -109,7 +110,7 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/differentialabundance was originally written by Jonathan Manning ([@pinin4fjords](https://github.com/pinin4fjords)) and Oskar Wacker ([@WackerO](https://github.com/WackerO)). Jonathan Manning (now at Seqera) initially worked on this workflow as an employee of Healx, an AI-powered, patient-inspired tech company, accelerating the discovery and development of treatments for rare diseases. We are grateful for their support of open science in this project. +nf-core/differentialabundance was originally written by Jonathan Manning ([@pinin4fjords](https://github.com/pinin4fjords)) and Oskar Wacker ([@WackerO](https://github.com/WackerO)). Jonathan Manning (now at Seqera) initially worked on this workflow as an employee of Healx, an AI-powered, patient-inspired tech company, accelerating the discovery and development of treatments for rare diseases. Oskar Wacker works for [QBiC](https://www.qbic.uni-tuebingen.de/) at Tübingen University. We are grateful for the support of open science in this project. We thank the many members of the nf-core community who assisted with this pipeline, often by reviewing module pull requests including but not limited to: diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd index 828ebe03..7bf41c12 100644 --- a/assets/differentialabundance_report.Rmd +++ b/assets/differentialabundance_report.Rmd @@ -1,142 +1,157 @@ --- output: - html_document: - toc: true # table of contents - toc_float: true # float the table of contents to the left of the main document content - toc_depth: 4 # header levels 1,2,3 - theme: default - number_sections: false # add section numbering to headers - df_print: paged # tables are printed as an html table with support for pagination over rows and columns - highlight: pygments - pdf_document: true - pdf_document: - toc: yes + html_document: + toc: true # table of contents + toc_float: true # float the table of contents to the left of the main document content + toc_depth: 4 # header levels 1,2,3 + theme: default + number_sections: false # add section numbering to headers + df_print: paged # tables are printed as an html table with support for pagination over rows and columns + highlight: pygments + pdf_document: true + pdf_document: + toc: yes date: "`r Sys.Date()`" params: - meta: NULL - input_dir: NULL - artifact_dir: NULL - cpus: 1 - study_type: NULL - study_name: NULL - study_abundance_type: NULL - report_file: NULL, - report_title: NULL, - report_author: NULL, - report_description: NULL, - report_scree: NULL - observations_type: NULL - observations: NULL # GSE156533.samplesheet.csv - observations_id_col: NULL - observations_name_col: NULL - features: NULL - features_type: NULL - features_id_col: NULL - features_name_col: NULL - features_metadata_cols: NULL - features_gtf_feature_type: NULL - features_gtf_table_first_field: NULL - features_log2_assays: NULL - raw_matrix: null # e.g. 0_salmon.merged.gene_counts_length_scaled.tsv - normalised_matrix: null - variance_stabilised_matrix: null # e.g. test_files/3_treatment-WT-P23H.vst.tsv - contrasts_file: null # e.g. GSE156533.contrasts.csv - differential_table: file.csv - proteus_measurecol_prefix: NULL - proteus_norm_function: NULL - proteus_plotsd_method: NULL - proteus_plotmv_loess: NULL - proteus_palette_name: NULL - proteus_round_digits: NULL - affy_cel_files_archive: NULL - affy_file_name_col: NULL - affy_background: NULL - affy_bgversion: NULL - affy_destructive: NULL - affy_cdfname: NULL - affy_rm_mask: NULL - affy_rm_outliers: NULL - affy_rm_extra: NULL - affy_build_annotation: NULL - limma_ndups: NULL - limma_spacing: NULL - limma_block: NULL - limma_correlation: NULL - limma_method: NULL - limma_proportion: NULL - limma_stdev_coef_lim: NULL - limma_trend: NULL - limma_robust: NULL - limma_winsor_tail_p: NULL - limma_adjust_method: NULL - limma_p_value: NULL - limma_lfc: NULL - limma_confint: NULL - exploratory_n_features: null - exploratory_clustering_method: null - exploratory_cor_method: null - exploratory_whisker_distance: null - exploratory_mad_threshold: null - exploratory_main_variable: null - exploratory_assay_names: NULL - exploratory_final_assay: NULL - exploratory_palette_name: NULL - versions_file: null # e.g 17_software_versions.yml - logo: null - css: null - citations: null - filtering_min_samples: 1 - filtering_min_abundance: 1 - filtering_min_proportion: NULL - filtering_grouping_var: NULL - differential_file_suffix: NULL - differential_feature_id_column: NULL - differential_feature_name_column: NULL - differential_fc_column: NULL - differential_pval_column: NULL - differential_qval_column: NULL - differential_min_fold_change: NULL - differential_foldchanges_logged: NULL - differential_max_pval: NULL - differential_max_qval: NULL - differential_palette_name: NULL - differential_subset_to_contrast_samples: NULL - deseq2_test: NULL - deseq2_fit_type: NULL - deseq2_sf_type: NULL - deseq2_min_replicates_for_replace: NULL - deseq2_use_t: NULL - deseq2_lfc_threshold: NULL - deseq2_alt_hypothesis: NULL - deseq2_independent_filtering: NULL - deseq2_p_adjust_method: NULL - deseq2_alpha: NULL - deseq2_minmu: NULL - deseq2_vs_method: NULL - deseq2_shrink_lfc: NULL - deseq2_cores: NULL - deseq2_vs_blind: NULL - deseq2_vst_nsub: NULL - gsea_run: false - gsea_nperm: NULL - gsea_permute: NULL - gsea_scoring_scheme: NULL - gsea_metric: NULL - gsea_sort: NULL - gsea_order: NULL - gsea_set_max: NULL - gsea_set_min: NULL - gsea_norm: NULL - gsea_rnd_type: NULL - gsea_make_sets: NULL - gsea_median: NULL - gsea_num: NULL - gsea_plot_top_x: NULL - gsea_rnd_seed: NULL - gsea_save_rnd_lists: NULL - gsea_zip_report: NULL - gsea_chip_file: NULL - gsea_gene_sets: NULL + meta: NULL + input_dir: NULL + artifact_dir: NULL + cpus: 1 + study_type: NULL + study_name: NULL + study_abundance_type: NULL + report_file: NULL, + report_title: NULL, + report_contributors: NULL + report_author: NULL, + report_description: NULL, + report_scree: NULL + report_round_digits: NULL + observations_type: NULL + observations: NULL # GSE156533.samplesheet.csv + observations_id_col: NULL + observations_name_col: NULL + features: NULL + features_type: NULL + features_id_col: NULL + features_name_col: NULL + features_metadata_cols: NULL + features_gtf_feature_type: NULL + features_gtf_table_first_field: NULL + exploratory_log2_assays: NULL + raw_matrix: null # e.g. 0_salmon.merged.gene_counts_length_scaled.tsv + normalised_matrix: null + variance_stabilised_matrix: null # e.g. test_files/3_treatment-WT-P23H.vst.tsv + contrasts_file: null # e.g. GSE156533.contrasts.csv + differential_table: file.csv + proteus_measurecol_prefix: NULL + proteus_norm_function: NULL + proteus_plotsd_method: NULL + proteus_plotmv_loess: NULL + proteus_palette_name: NULL + affy_cel_files_archive: NULL + affy_file_name_col: NULL + affy_background: NULL + affy_bgversion: NULL + affy_destructive: NULL + affy_cdfname: NULL + affy_rm_mask: NULL + affy_rm_outliers: NULL + affy_rm_extra: NULL + affy_build_annotation: NULL + limma_ndups: NULL + limma_spacing: NULL + limma_block: NULL + limma_correlation: NULL + limma_method: NULL + limma_proportion: NULL + limma_stdev_coef_lim: NULL + limma_trend: NULL + limma_robust: NULL + limma_winsor_tail_p: NULL + limma_adjust_method: NULL + limma_p_value: NULL + limma_lfc: NULL + limma_confint: NULL + exploratory_n_features: null + exploratory_clustering_method: null + exploratory_cor_method: null + exploratory_whisker_distance: null + exploratory_mad_threshold: null + exploratory_main_variable: null + exploratory_assay_names: NULL + exploratory_final_assay: NULL + exploratory_palette_name: NULL + versions_file: null # e.g 17_software_versions.yml + logo: null + css: null + citations: null + filtering_min_samples: 1 + filtering_min_abundance: 1 + filtering_min_proportion: NULL + filtering_grouping_var: NULL + differential_file_suffix: NULL + differential_feature_id_column: NULL + differential_feature_name_column: NULL + differential_fc_column: NULL + differential_pval_column: NULL + differential_qval_column: NULL + differential_min_fold_change: NULL + differential_foldchanges_logged: NULL + differential_max_pval: NULL + differential_max_qval: NULL + differential_palette_name: NULL + differential_subset_to_contrast_samples: NULL + deseq2_test: NULL + deseq2_fit_type: NULL + deseq2_sf_type: NULL + deseq2_min_replicates_for_replace: NULL + deseq2_use_t: NULL + deseq2_lfc_threshold: NULL + deseq2_alt_hypothesis: NULL + deseq2_independent_filtering: NULL + deseq2_p_adjust_method: NULL + deseq2_alpha: NULL + deseq2_minmu: NULL + deseq2_vs_method: NULL + deseq2_shrink_lfc: NULL + deseq2_cores: NULL + deseq2_vs_blind: NULL + deseq2_vst_nsub: NULL + gsea_run: false + gsea_nperm: NULL + gsea_permute: NULL + gsea_scoring_scheme: NULL + gsea_metric: NULL + gsea_sort: NULL + gsea_order: NULL + gsea_set_max: NULL + gsea_set_min: NULL + gsea_norm: NULL + gsea_rnd_type: NULL + gsea_make_sets: NULL + gsea_median: NULL + gsea_num: NULL + gsea_plot_top_x: NULL + gsea_rnd_seed: NULL + gsea_save_rnd_lists: NULL + gsea_zip_report: NULL + gsea_chip_file: NULL + gprofiler2_run: false + gprofiler2_organism: NULL + gprofiler2_significant: NULL + gprofiler2_measure_underrepresentation: NULL + gprofiler2_correction_method: NULL + gprofiler2_sources: NULL + gprofiler2_evcodes: NULL + gprofiler2_max_qval: NULL + gprofiler2_token: NULL + gprofiler2_background_file: NULL + gprofiler2_background_column: NULL + gprofiler2_domain_scope: NULL + gprofiler2_min_diff: NULL + gprofiler2_palette_name: NULL + gene_sets_files: NULL --- @@ -149,6 +164,29 @@ library(plotly) library(DT) ``` + + +```{r, include=FALSE} +round_dataframe_columns <- function(df, columns = NULL, digits = -1) { + if (digits == -1) { + return(df) # if -1, return df without rounding + } + + df <- data.frame(df, check.names = FALSE) # make data.frame from vector as otherwise, the format will get messed up + if (is.null(columns)) { + columns <- colnames(df)[(unlist(lapply(df, is.numeric), use.names=F))] # extract only numeric columns for rounding + } + df[,columns] <- format(data.frame(df[, columns], check.names = FALSE), scientific=T, digits=params$report_round_digits) + # Convert columns back to numeric + + for (c in columns) { + df[[c]][grep("^ *NA$", df[[c]])] <- NA + df[[c]] <- as.numeric(df[[c]]) + } + df +} +``` + ```{r include = FALSE} # Load the datatables js datatable(NULL) @@ -160,31 +198,32 @@ params_table <- data.frame(Parameter = names(unlist(params)), Value = unlist(par # We'll subset the params table for different report sections make_params_table <- function(name, pattern = NULL, remove_pattern = FALSE){ - subparams <- params_table - if (! is.null(pattern)){ - subparams <- subparams[grep(pattern, subparams$Parameter),] - } - if (remove_pattern){ - subparams$Parameter <- sub(pattern, '', subparams$Parameter) - } - - if (nrow(subparams) > 10){ - dom <- 'tp' - }else{ - dom <- 't' - } - - print( htmltools::tagList(datatable(subparams, caption = paste("Parameters used for", name), rownames = FALSE, options = list(dom = dom)) )) + subparams <- params_table + if (! is.null(pattern)){ + subparams <- subparams[grep(pattern, subparams$Parameter),] + } + if (remove_pattern){ + subparams$Parameter <- sub(pattern, '', subparams$Parameter) + } + + if (nrow(subparams) > 10){ + dom <- 'tp' + }else{ + dom <- 't' + } + + print( htmltools::tagList(datatable(subparams, caption = paste("Parameters used for", name), rownames = FALSE, options = list(dom = dom)) )) } report_title <- paste0('Differential ', params$features_type, ' abundance report', ifelse(is.null(params$report_title), '', paste0(': ', params$report_title))) -report_subtitle <- paste0(ifelse(is.null(params$report_author), '', paste0('By ', params$report_author, ', ')), 'differentialabundance workflow version', versions[["Workflow.nf-core/differentialabundance"]]) +report_subtitle <- paste0(ifelse(is.null(params$report_author), '', paste0('By ', params$report_author, ', ')), '
differentialabundance workflow version', versions[["Workflow.nf-core/differentialabundance"]]) ``` --- title: "`r report_title`" subtitle: `r report_subtitle` --- +\ @@ -202,31 +241,48 @@ htmltools::includeCSS(params$css) cat(paste0(" ")) ``` + + +```{r, results='asis', echo=F, eval=!is.null(params$report_contributors)} +contributors <- gsub("\n", "
", params$report_contributors, fixed = TRUE) +contributors <- lapply(simpleSplit(contributors, ";"), function(s) { + splt <- simpleSplit(s, "
") + paste0("**", head(splt, 1), "**
", paste(tail(splt, -1), collapse = "
")) +}) + +for (r in seq_along(contributors)) { + if (r %% 2 == 1) cat("
") + cat(paste0("
", contributors[r], "
")) + if (r %% 2 == 0 || r == length(contributors)) cat("
") +} +``` + ```{r, echo=FALSE} observations <- read_metadata(file.path(params$input_dir, params$observations), id_col = params$observations_id_col) -if (! params$observations_name_col %in% colnames(observations)){ - stop(paste('Invalid observation name column specified: ', params$observations_name_col, paste0('(Valid values are: ', paste(colnames(observations), collapse=', '),')'))) +observations_name_col <- ifelse(!is.null(params$observations_name_col), params$observations_name_col, params$observations_id_col) +if (! observations_name_col %in% colnames(observations)){ + stop(paste('Invalid observation name column specified: ', observations_name_col, paste0('(Valid values are: ', paste(colnames(observations), collapse=', '),')'))) } if (! is.null(params$features)){ - features <- read_metadata(file.path(params$input_dir, params$features)) - if (! is.null(params$features_metadata_cols)){ - features <- features[,colnames(features) %in% simpleSplit(params$features_metadata_cols), drop = FALSE] - } + features <- read_metadata(file.path(params$input_dir, params$features)) + if (! is.null(params$features_metadata_cols)){ + features <- features[,colnames(features) %in% simpleSplit(params$features_metadata_cols), drop = FALSE] + } } contrasts <- read_metadata(file.path(params$input_dir, params$contrasts_file)) contrasts$blocking <- na.replace(contrasts$blocking, '') if (! 'id' %in% colnames(contrasts)){ - contrasts$id <- apply(contrasts, 1, paste, collapse='_') + contrasts$id <- apply(contrasts, 1, paste, collapse='_') } # Identify informative variables- those with a number of values greater than 1 @@ -243,29 +299,30 @@ names(assay_names) = assay_names assay_files <- lapply(assay_names, function(x) params[[paste0(x, '_matrix')]]) assay_data <- lapply(assay_files, function(x) { - mat <- na.omit( - read_matrix( - x, - sample_metadata = observations, - row.names = 1 + mat <- na.omit( + read_matrix( + x, + sample_metadata = observations, + row.names = 1 + ) ) - ) - colnames(mat) <- observations[[params$observations_name_col]][match(colnames(mat), rownames(observations))] - mat + colnames(mat) <- observations[[observations_name_col]][match(colnames(mat), rownames(observations))] + mat }) -if (!is.null(params$features_log2_assays)) { - # Remove brackets from assay list. TODO: remove if this is added to cond_log2_transform_assays - params$features_log2_assays <- gsub('\\]$', '', gsub('^\\[', '', params$features_log2_assays)) +log2_assays <- params$exploratory_log2_assays +if (!is.null(log2_assays)) { + # Remove brackets from assay list. TODO: remove if this is added to cond_log2_transform_assays + log2_assays <- gsub('\\]$', '', gsub('^\\[', '', log2_assays)) } -assay_data <- cond_log2_transform_assays(assay_data, params$features_log2_assays) +assay_data <- cond_log2_transform_assays(assay_data, log2_assays, prettify_names = FALSE) # Now we can rename the observations rows using the title field -rownames(observations) <- observations[[params$observations_name_col]] +rownames(observations) <- observations[[observations_name_col]] # Run PCA early so we can understand how important each variable is pca_datas <- lapply(names(assay_data), function(assay_type){ - compilePCAData(assay_data[[assay_type]]) + compilePCAData(assay_data[[assay_type]]) }) names(pca_datas) <- names(assay_data) @@ -276,14 +333,14 @@ informative_variables <- rownames(pca_vs_meta)[order(pca_vs_meta[,1])] # Pick the variable used for coloring purposes etc if (params$exploratory_main_variable == 'contrasts'){ - main_grouping_variable <- contrasts$variable[1] + main_grouping_variable <- contrasts$variable[1] }else if (params$exploratory_main_variable == 'auto_pca'){ - main_grouping_variable <- informative_variables[1] + main_grouping_variable <- informative_variables[1] }else{ - if (! params$exploratory_main_variable %in% colnames(observations)){ - stop(paste('Invalid main variable specified: ', params$exploratory_main_variable)) - } - main_grouping_variable <- params$exploratory_main_variable + if (! params$exploratory_main_variable %in% colnames(observations)){ + stop(paste('Invalid main variable specified: ', params$exploratory_main_variable)) + } + main_grouping_variable <- params$exploratory_main_variable } # Make sure the main variable is shown first, with remaining shown in order of @@ -305,35 +362,38 @@ treatment-mCherry-hND6-batcheffect.deseq2.results.tsv --> ```{r, echo=FALSE} - +differential_file_suffix <- params$differential_file_suffix +if (is.null(differential_file_suffix)) { + differential_file_suffix <- ifelse(params$study_type %in% c('rnaseq'), ".deseq2.results.tsv", ".limma.results.tsv") +} differential_files <- lapply(contrasts$id, function(d){ - file.path(params$input_dir, paste0(gsub(' |;', '_', d), params$differential_file_suffix)) + file.path(params$input_dir, paste0(gsub(' |;', '_', d), differential_file_suffix)) }) differential_results <- lapply(differential_files, function(diff_file){ - if (! file.exists(diff_file)){ - stop(paste("Differential file", diff_file, "does not exist")) - } - diff <- read_differential( - diff_file, - feature_id_column = params$differential_feature_id_column, - fc_column = params$differential_fc_column, - pval_column = params$differential_pval_column, - qval_column = params$differential_qval_column - ) - - # If fold changes are not logged already, log them (we assume they're logged - # later on) - - if (! params$differential_foldchanges_logged){ - diff[[params$differential_fc_column]] <- log2(diff[[params$differential_fc_column]]) - } - - # Annotate differential tables if possible - if (! is.null(params$features)){ - diff <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column) - } - diff + if (! file.exists(diff_file)){ + stop(paste("Differential file", diff_file, "does not exist")) + } + diff <- read_differential( + diff_file, + feature_id_column = params$differential_feature_id_column, + fc_column = params$differential_fc_column, + pval_column = params$differential_pval_column, + qval_column = params$differential_qval_column + ) + + # If fold changes are not logged already, log them (we assume they're logged + # later on) + + if (! params$differential_foldchanges_logged){ + diff[[params$differential_fc_column]] <- log2(diff[[params$differential_fc_column]]) + } + + # Annotate differential tables if possible + if (! is.null(params$features)){ + diff <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column) + } + diff }) names(differential_results) <- contrasts$id ``` @@ -345,18 +405,18 @@ names(differential_results) <- contrasts$id # Function to make friendly contrast name from contrast components, including optional bits name_contrast <- function(i){ - contrast_name <- paste(contrasts$target[i], 'versus', contrasts$reference[i], 'in', contrasts$variable[i]) - contrast_vals <- contrasts[i,] - populated <- colnames(contrasts)[! (is.na(contrast_vals) | contrast_vals == '' | is.null(contrast_vals))] - optional <- setdiff(populated, c('id', 'target', 'reference', 'variable')) - - if (length(optional) > 0){ - optional_part <- paste0('(', paste(paste(optional, contrasts[i,optional], sep=': '), collapse=', '), ')') - }else{ - optional_part <- '' - } - - paste(contrast_name, optional_part) + contrast_name <- paste(contrasts$target[i], 'versus', contrasts$reference[i], 'in', contrasts$variable[i]) + contrast_vals <- contrasts[i,] + populated <- colnames(contrasts)[! (is.na(contrast_vals) | contrast_vals == '' | is.null(contrast_vals))] + optional <- setdiff(populated, c('id', 'target', 'reference', 'variable')) + + if (length(optional) > 0){ + optional_part <- paste0('(', paste(paste(optional, contrasts[i,optional], sep=': '), collapse=', '), ')') + }else{ + optional_part <- '' + } + + paste(contrast_name, optional_part) } contrast_descriptions <- unlist(lapply(1:nrow(contrasts), function(x) name_contrast(x))) @@ -366,27 +426,27 @@ contrast_descriptions <- unlist(lapply(1:nrow(contrasts), function(x) name_contr p_value_types <- list(Adjusted = params$differential_qval_column, Unadjusted = params$differential_pval_column) p_value_thresholds <- list(Adjusted = params$differential_max_qval, Unadjusted = params$differential_max_pval) -sig_differential <- - lapply(names(p_value_types), function(pvt){ - diff <- lapply( - 1:nrow(contrasts), - function(x){ - signif <- differential_results[[x]][,p_value_types[[pvt]] ] < p_value_thresholds[[pvt]] - list( - up = differential_results[[x]][which( - differential_results[[x]][,params$differential_fc_column ] > log2(params$differential_min_fold_change) & - signif - ),], - down = differential_results[[x]][which( - differential_results[[x]][,params$differential_fc_column ] < log2(1/params$differential_min_fold_change) & - signif - ),] - ) - } - ) - names(diff) <- contrast_descriptions - diff - }) +sig_differential <- + lapply(names(p_value_types), function(pvt){ + diff <- lapply( + 1:nrow(contrasts), + function(x){ + signif <- differential_results[[x]][,p_value_types[[pvt]] ] < p_value_thresholds[[pvt]] + list( + up = differential_results[[x]][which( + differential_results[[x]][,params$differential_fc_column ] > log2(params$differential_min_fold_change) & + signif + ),], + down = differential_results[[x]][which( + differential_results[[x]][,params$differential_fc_column ] < log2(1/params$differential_min_fold_change) & + signif + ),] + ) + } + ) + names(diff) <- contrast_descriptions + diff + }) names(sig_differential) <- names(p_value_types) # Count the differential genes @@ -398,7 +458,7 @@ names(differential_tables) <- names(sig_differential) # Abstract -This report summarises differential `r params$features_type` analysis as performed by the nf-core/differentialabundance pipeline. +This report summarises differential `r params$features_type` analysis as performed by the nf-core/differentialabundance pipeline. # Data @@ -416,9 +476,9 @@ minimal_fetchngs_cols <- c('sample', 'sample_title', 'strandedness', 'library_st # If the data came via fetchngs then we can infer a couple of things about the most useful columns if (all(minimal_fetchngs_cols %in% colnames(observations))){ - additional_useful_cols <- minimal_fetchngs_cols + additional_useful_cols <- minimal_fetchngs_cols }else{ - additional_useful_cols <- colnames(observations)[which(apply(observations, 2, function(x) max(nchar(x))) <= 20)] + additional_useful_cols <- colnames(observations)[which(apply(observations, 2, function(x) max(nchar(x))) <= 20)] } display_columns <- head(union(display_columns, additional_useful_cols), 5) @@ -428,7 +488,6 @@ display_columns <- unique(c(display_columns, informative_variables)) observations_to_print <- observations[,unique(display_columns)] colnames(observations_to_print) <- prettifyVariablename(colnames(observations_to_print)) print( htmltools::tagList(datatable(observations_to_print, caption = paste(ucfirst(params$observations_type), 'metadata'), rownames = FALSE, options = list(dom = 'tb')) )) - ``` ## Contrasts @@ -442,13 +501,13 @@ colnames(contrasts_to_print) <- prettifyVariablename(colnames(contrasts_to_print # Add design/model formulae to report de_tool <- ifelse(params$study_type %in% c('rnaseq'), "deseq2", "limma") contrasts_to_print$model <- sapply(contrasts_to_print$Id, function(id) { - model_file <- paste0(id, ".", de_tool, ".model.txt") - if (file.exists(model_file)) { - first_line <- readLines(model_file, n = 1) - return(first_line) - } else { - return(NA) - } + model_file <- paste0(id, ".", de_tool, ".model.txt") + if (file.exists(model_file)) { + first_line <- readLines(model_file, n = 1) + return(first_line) + } else { + return(NA) + } }) print( htmltools::tagList(datatable(contrasts_to_print, caption = paste0("Table of contrasts"), rownames = FALSE, options = list(dom = 't')) )) @@ -466,22 +525,18 @@ Input was a matrix of `r nrow(assay_data$raw)` `r params$features_type`s for `r The following plots show the abundance value distributions of input matrices. A log2 transformation is applied where not already performed. -```{r, include=FALSE} - -``` - #### Box plots ```{r, echo=FALSE, results='asis', fig.height=8} p <- ggplot_boxplot( - assay_data, - experiment = observations, - colorby = main_grouping_variable, - expressiontype = paste("count per", params$features_type), - palette = groupColorScale, - whisker_distance = params$exploratory_whisker_distance, - base_size=8 + assay_data, + experiment = observations, + colorby = main_grouping_variable, + expressiontype = paste("count per", params$features_type), + palette = groupColorScale, + whisker_distance = params$exploratory_whisker_distance, + base_size=8 ) print(p) ``` @@ -492,11 +547,11 @@ Whiskers in the above boxplots show `r params$exploratory_whisker_distance` time ```{r, echo=FALSE, results='asis', fig.height=8} plotly_densityplot( - assay_data, - experiment = observations, - colorby = params$observations_name_col, - expressiontype = paste("count per", params$features_type), - makeColorScale(length(unique(observations[[params$observations_id_col]])), palette = "Set1") + assay_data, + experiment = observations, + colorby = observations_name_col, + expressiontype = paste("count per", params$features_type), + makeColorScale(length(unique(observations[[params$observations_id_col]])), palette = "Set1") ) ``` @@ -506,62 +561,62 @@ cat(paste0("\n### ", ucfirst(params$observations_type), " relationships\n")) #### Principal components plots {.tabset} -Principal components analysis was conducted based on the `r params$exploratory_n_features` most variable `r params$features_type`s. Each component was annotated with its percent contribution to variance. +Principal components analysis was conducted based on the `r params$exploratory_n_features` most variable `r params$features_type`s. Each component was annotated with its percent contribution to variance. ```{r, echo=FALSE, results='asis'} # Create nested list to save the percentVars for reusing in the scree plot percentVar_list <- list() for (assay_type in rev(names(assay_data))){ - - pca_data <- pca_datas[[assay_type]] - for (iv in informative_variables){ - - cat(paste0("\n##### ", prettifyVariablename(assay_type), " (", iv, ")\n")) - - plotdata <- pca_data$coords - plotdata$colorby <- factor( - observations[[iv]], - levels = unique(observations[[iv]]) - ) - pcaColorScale <- makeColorScale(length(unique(observations[[iv]])), palette = params$exploratory_palette_name) - - # Make plotting data combining PCA coords with coloring groups etc - - plotdata$name <- rownames(plotdata) - percentVar <- pca_data$percentVar - labels <- paste0(colnames(plotdata), " (", sprintf("%.1f", percentVar), "%)") - ncats <- length(unique(plotdata$colorby)) - - plot_types <- list("2" = "scatter", "3" = "scatter3d") - - for (d in names(plot_types)) { - - # Default plot args whatever we're doing - - plot_args <- list( - x = pca_data$coords[, 1], - y = pca_data$coords[, 2], - xlab = labels[1], - ylab = labels[2], - colorby = plotdata$colorby, - plot_type = plot_types[[d]], - palette = pcaColorScale, - legend_title = prettifyVariablename(iv), - labels = plotdata$name, - show_labels = TRUE - ) - - if (d == "3") { - plot_args$z <- pca_data$coords[, 3] - plot_args$zlab <- labels[3] - } - - print(htmltools::tagList(do.call("plotly_scatterplot", plot_args))) - } - if (! assay_type %in% names(percentVar_list)){ - percentVar_list[[assay_type]] <- percentVar + + pca_data <- pca_datas[[assay_type]] + for (iv in informative_variables){ + + cat(paste0("\n##### ", prettifyVariablename(assay_type), " (", iv, ")\n")) + + plotdata <- pca_data$coords + plotdata$colorby <- factor( + observations[[iv]], + levels = unique(observations[[iv]]) + ) + pcaColorScale <- makeColorScale(length(unique(observations[[iv]])), palette = params$exploratory_palette_name) + + # Make plotting data combining PCA coords with coloring groups etc + + plotdata$name <- rownames(plotdata) + percentVar <- pca_data$percentVar + labels <- paste0(colnames(plotdata), " (", sprintf("%.1f", percentVar), "%)") + ncats <- length(unique(plotdata$colorby)) + + plot_types <- list("2" = "scatter", "3" = "scatter3d") + + for (d in names(plot_types)) { + + # Default plot args whatever we're doing + + plot_args <- list( + x = pca_data$coords[, 1], + y = pca_data$coords[, 2], + xlab = labels[1], + ylab = labels[2], + colorby = plotdata$colorby, + plot_type = plot_types[[d]], + palette = pcaColorScale, + legend_title = prettifyVariablename(iv), + labels = plotdata$name, + show_labels = TRUE + ) + + if (d == "3") { + plot_args$z <- pca_data$coords[, 3] + plot_args$zlab <- labels[3] + } + + print(htmltools::tagList(do.call("plotly_scatterplot", plot_args))) + } + if (! assay_type %in% names(percentVar_list)){ + percentVar_list[[assay_type]] <- percentVar + } } - } } ``` @@ -571,19 +626,19 @@ cat(paste0("\nThe following scree plot visualizes what percentage of total varia #iv <- informative_variables[1] for (assay_type in names(percentVar_list)) { - percentVarData <- data.frame(percentVar_list[[assay_type]]) - colnames(percentVarData) <- c("var_explained") - percentVarData$PCA <- as.numeric(rownames(percentVarData)) - cat(paste0("\n##### ", prettifyVariablename(assay_type), "\n")) - print( - ggplot(percentVarData, aes(x=factor(PCA),y=var_explained, group=1)) + - theme_bw() + - geom_point(size=4) + - geom_line(linetype="dashed") + - xlab("PC") + - ylab("Percent variance explained") - ) - cat("\n") + percentVarData <- data.frame(percentVar_list[[assay_type]]) + colnames(percentVarData) <- c("var_explained") + percentVarData$PCA <- as.numeric(rownames(percentVarData)) + cat(paste0("\n##### ", prettifyVariablename(assay_type), "\n")) + print( + ggplot(percentVarData, aes(x=factor(PCA),y=var_explained, group=1)) + + theme_bw() + + geom_point(size=4) + + geom_line(linetype="dashed") + + xlab("PC") + + ylab("Percent variance explained") + ) + cat("\n") } ``` @@ -598,68 +653,68 @@ The resulting p values are illustrated below. # This is a little hack to work around a bug in d3heatmap with single-row data # frames. if (nrow(pca_vs_meta) == 1){ - plot_pca_meta <- rbind(pca_vs_meta, pca_vs_meta) + plot_pca_meta <- rbind(pca_vs_meta, pca_vs_meta) }else{ - plot_pca_meta <- pca_vs_meta + plot_pca_meta <- pca_vs_meta } d3heatmap::d3heatmap( - -log10(plot_pca_meta), - Rowv = FALSE, - dendrogram = 'none', - cellnote = plot_pca_meta, - cexCol = 0.8, - cexRow = 0.8, - height = (100 + (15 * nrow(plot_pca_meta))), - colors = colorRampPalette( - rev( - RColorBrewer::brewer.pal(n = 7, name = "RdYlBu") - ) - )(100) + -log10(plot_pca_meta), + Rowv = FALSE, + dendrogram = 'none', + cellnote = plot_pca_meta, + cexCol = 0.8, + cexRow = 0.8, + height = (100 + (15 * nrow(plot_pca_meta))), + colors = colorRampPalette( + rev( + RColorBrewer::brewer.pal(n = 7, name = "RdYlBu") + ) + )(100) ) for (variable in rownames(pca_vs_meta)){ - sig_comps <- pca_vs_meta[variable,] < 0.1 - - if (any(sig_comps)){ - min_sig_comp <- min(which(sig_comps)) - - min_sig_comp_p <- sprintf("%.2f", pca_vs_meta[variable, min_sig_comp]) - cat(paste0('The variable \'', variable, '\' shows an association with ', colnames(pca_vs_meta)[min_sig_comp], ' (p = ', min_sig_comp_p,'). ')) - } + sig_comps <- pca_vs_meta[variable,] < 0.1 + + if (any(sig_comps)){ + min_sig_comp <- min(which(sig_comps)) + + min_sig_comp_p <- sprintf("%.2f", pca_vs_meta[variable, min_sig_comp]) + cat(paste0('The variable \'', variable, '\' shows an association with ', colnames(pca_vs_meta)[min_sig_comp], ' (p = ', min_sig_comp_p,'). ')) + } } ``` #### Clustering dendrograms {.tabset} -A hierarchical clustering of `r params$features_type`s was undertaken based on `r ifelse(params$exploratory_n_features == -1, paste0("all ", params$features_type), paste0("the ", params$exploratory_n_features, " most variable ", params$features_type))`s. Distances between `r params$features_type`s were estimated based on `r params$exploratory_cor_method` correlation, which were then used to produce a clustering via the `r params$exploratory_clustering_method` method with `hclust()` in R. +A hierarchical clustering of `r params$features_type`s was undertaken based on `r ifelse(params$exploratory_n_features == -1, paste0("all ", params$features_type), paste0("the ", params$exploratory_n_features, " most variable ", params$features_type))`s. Distances between `r params$features_type`s were estimated based on `r params$exploratory_cor_method` correlation, which were then used to produce a clustering via the `r params$exploratory_clustering_method` method with `hclust()` in R. ```{r, echo=FALSE, results='asis'} for (assay_type in rev(names(assay_data))){ - for (iv in informative_variables){ - cat(paste0("\n##### ", prettifyVariablename(assay_type), " (", iv, ")\n")) - variable_genes <- selectVariableGenes(matrix = assay_data[[assay_type]], ntop = ifelse(params$exploratory_n_features == -1, nrow(assay_data[[assay_type]]), params$exploratory_n_features)) - - dendroColorScale <- makeColorScale(length(unique(observations[[iv]])), palette = params$exploratory_palette_name) - p <- clusteringDendrogram( - 2^assay_data[[assay_type]][variable_genes, ], - observations[, iv, drop = FALSE], - colorby = iv, - cor_method = params$exploratory_cor_method, - plot_title = paste0( - paste0(params$observations_type," clustering dendrogram, "), - ifelse(params$exploratory_n_features == -1, nrow(assay_data[[assay_type]]), paste0(params$exploratory_n_features, " most variable")), " ", - params$features_type, - "s\n(", params$exploratory_clustering_method, " clustering, ", params$exploratory_cor_method, " correlation)"), - cluster_method = params$exploratory_clustering_method, - palette = dendroColorScale, - labelspace = 0.25 - ) - # Defaults in shinyngs make the text in this plot a bit big for the report, so - # scale it down a bit - print(p, vp=grid::viewport(gp=grid::gpar(cex=0.7))) - cat("\n") - } + for (iv in informative_variables){ + cat(paste0("\n##### ", prettifyVariablename(assay_type), " (", iv, ")\n")) + variable_genes <- selectVariableGenes(matrix = assay_data[[assay_type]], ntop = ifelse(params$exploratory_n_features == -1, nrow(assay_data[[assay_type]]), params$exploratory_n_features)) + + dendroColorScale <- makeColorScale(length(unique(observations[[iv]])), palette = params$exploratory_palette_name) + p <- clusteringDendrogram( + 2^assay_data[[assay_type]][variable_genes, ], + observations[, iv, drop = FALSE], + colorby = iv, + cor_method = params$exploratory_cor_method, + plot_title = paste0( + paste0(params$observations_type," clustering dendrogram, "), + ifelse(params$exploratory_n_features == -1, nrow(assay_data[[assay_type]]), paste0(params$exploratory_n_features, " most variable")), " ", + params$features_type, + "s\n(", params$exploratory_clustering_method, " clustering, ", params$exploratory_cor_method, " correlation)"), + cluster_method = params$exploratory_clustering_method, + palette = dendroColorScale, + labelspace = 0.25 + ) + # Defaults in shinyngs make the text in this plot a bit big for the report, so + # scale it down a bit + print(p, vp=grid::viewport(gp=grid::gpar(cex=0.7))) + cat("\n") + } } ``` @@ -675,40 +730,40 @@ if (any(iv_min_group_sizes > 2)){ } foo <- lapply(informative_variables[iv_min_group_sizes > 2], function(iv){ - - cat(paste("\n####", iv, "\n")) - - plotdata <- - madScore( - matrix = assay_data[[params$exploratory_final_assay]], - sample_sheet = observations, - groupby = iv - ) - - if (! is.null(plotdata)){ - mad_plot_args <- list( - x = plotdata$group, - y = plotdata$mad, - color = plotdata$outlier, - hline_thresholds = c("Outlier threshold" = params$exploratory_mad_threshold), - palette = makeColorScale(2, palette = params$differential_palette_name), - legend_title = "Outlier status", - labels = rownames(plotdata), - show_labels = TRUE, - xlab = "Sample group", - ylab = "MAD score" - ) - print(htmltools::tagList(do.call("plotly_scatterplot", mad_plot_args))) + cat(paste("\n####", iv, "\n")) - outliers <- rownames(plotdata)[plotdata$outlier] + plotdata <- + madScore( + matrix = assay_data[[params$exploratory_final_assay]], + sample_sheet = observations, + groupby = iv + ) - if (length(outliers) == 0){ - cat(paste0("No outlying samples were detected in groups defined by ", iv,".\n")) - }else{ - cat(paste0(length(outliers), ' possible outliers were detected in groups defined by ', iv ,': ', paste(outliers, collapse=', '), "\n")) + if (! is.null(plotdata)){ + mad_plot_args <- list( + x = plotdata$group, + y = plotdata$mad, + color = plotdata$outlier, + hline_thresholds = c("Outlier threshold" = params$exploratory_mad_threshold), + palette = makeColorScale(2, palette = params$differential_palette_name), + legend_title = "Outlier status", + labels = rownames(plotdata), + show_labels = TRUE, + xlab = "Sample group", + ylab = "MAD score" + ) + + print(htmltools::tagList(do.call("plotly_scatterplot", mad_plot_args))) + + outliers <- rownames(plotdata)[plotdata$outlier] + + if (length(outliers) == 0){ + cat(paste0("No outlying samples were detected in groups defined by ", iv,".\n")) + }else{ + cat(paste0(length(outliers), ' possible outliers were detected in groups defined by ', iv ,': ', paste(outliers, collapse=', '), "\n")) + } } - } }) ``` @@ -725,26 +780,26 @@ cat(paste0( ```{r, echo=FALSE, results='asis'} foo <- lapply(names(p_value_types), function(pvt){ - cat("\n#### ", pvt, "\n") - print( htmltools::tagList(datatable(differential_tables[[pvt]], caption = paste0('Differential ', params$features_type, " ", params$abundance_type, ' (target relative to reference)'), options = list(dom = 't'), rownames = TRUE) )) - cat("\n") + cat("\n#### ", pvt, "\n") + print( htmltools::tagList(datatable(differential_tables[[pvt]], caption = paste0('Differential ', params$features_type, " ", params$abundance_type, ' (target relative to reference)'), options = list(dom = 't'), rownames = TRUE) )) + cat("\n") }) ``` ```{r, echo=FALSE, results='asis', eval = FALSE} differential_summary_string <- paste( - paste( - lapply( - 1:nrow(contrasts), - function(x){ - paste0( - "Contrast ", x, ' (', contrast_descriptions[x], ') ', "had ", differential_table[x,'up'], ' ', paste0(params$features_type, 's'), ' expressed significantly more highly in ', contrasts[x, 'target',], ' than ', contrasts[x, 'reference',], ' and ', differential_table[x,'down'], ' expressed at sifnificantly lower levels.' - ) - } - ), - collapse = ' ' - ) + paste( + lapply( + 1:nrow(contrasts), + function(x){ + paste0( + "Contrast ", x, ' (', contrast_descriptions[x], ') ', "had ", differential_table[x,'up'], ' ', paste0(params$features_type, 's'), ' expressed significantly more highly in ', contrasts[x, 'target',], ' than ', contrasts[x, 'reference',], ' and ', differential_table[x,'down'], ' expressed at sifnificantly lower levels.' + ) + } + ), + collapse = ' ' + ) ) cat(differential_summary_string) ``` @@ -753,121 +808,156 @@ cat(differential_summary_string) ```{r, echo=FALSE, results='asis'} for (i in 1:nrow(contrasts)){ - cat("\n#### ", contrast_descriptions[i], " {.tabset}\n") - - ## Make a volcano plot for the contrast first - - # Label features with symbol as well as identifier - if (! is.null(params$features) && (! is.null(params$differential_feature_name_column)) ){ - label_col <- params$differential_feature_name_column - }else{ - label_col <- params$differential_feature_id_column - } - - # Get the full set of differential stats for this contrast, removing rows with - # NAs in the fields we need. - full_de <- differential_results[[i]] - full_de <- subset(full_de, (! is.na(full_de[[params$differential_fc_column]])) & (! is.na(full_de[[params$differential_qval_colum]])) ) - - # We'll color by whether features are differential according to supplied thresholds - - p_value_types <- list(Adjusted = params$differential_qval_column, Unadjusted = params$differential_pval_column) - p_value_thresholds <- list(Adjusted = params$differential_max_qval, Unadjusted = params$differential_max_pval) - - for (pvt in names(p_value_types)){ - cat("\n##### ", pvt, " p values\n") - pval_column <- p_value_types[[pvt]] - - - de_fc <- abs(full_de[[params$differential_fc_column]]) >= log2(params$differential_min_fold_change) - de_fc_label <- paste("abs(logFC) >=", params$differential_min_fold_change) - - de_pval <- full_de[[pval_column]] <= p_value_thresholds[[pvt]] - de_pval_label <- paste(pvt, "<=", p_value_thresholds[[pvt]]) - - de_pval_fc_label <- paste(de_fc_label, '&', de_pval_label) - - full_de$differential_status <- "Not significant" - full_de$differential_status[de_fc] <- de_fc_label - full_de$differential_status[de_pval] <- de_pval_label - full_de$differential_status[de_fc & de_pval] <- de_pval_fc_label - full_de$differential_status <- factor(full_de$differential_status, levels = c("Not significant", de_fc_label, de_pval_label, de_pval_fc_label), ordered = TRUE) # Factorize status so that non-significant is always first - # Define the thresholds we'll draw - - hline_thresholds = vline_thresholds = list() - hline_thresholds[[paste(pval_column, '=', p_value_thresholds[[pvt]])]] = -log10(p_value_thresholds[[pvt]]) - vline_thresholds[[paste(params$differential_fc_column, '<=', log2(params$differential_min_fold_change))]] = -log2(params$differential_min_fold_change) - vline_thresholds[[paste(params$differential_fc_column, '>=', log2(params$differential_min_fold_change))]] = log2(params$differential_min_fold_change) - - palette_volcano <- append(c('#999999'), makeColorScale(3, params$differential_palette_name)) # set non-significant to gray - - plot_args <- list( - x = full_de[[params$differential_fc_column]], - y = -log10(full_de[[pval_column]]), - colorby = full_de$differential_status, - ylab = paste("-log(10)", pval_column), - xlab = xlabel <- paste("higher in", contrasts$reference[i], " <<", params$differential_fc_column, ">> higher in", contrasts$target[i]), - labels = full_de[[label_col]], - hline_thresholds = hline_thresholds, - vline_thresholds = vline_thresholds, - show_labels = FALSE, - legend_title = "Differential status", - palette = palette_volcano - ) - - # Let's equalize the axes - max_fc <- max(abs(full_de[[params$differential_fc_column]])) * 1.1 - p <- do.call(plotly_scatterplot, plot_args) %>% - layout(xaxis = list(range=list(-max_fc, max_fc))) - - print(htmltools::tagList(p)) - - ## ... then show tables of the up/ down genes - - for (dir in c('up', 'down')){ - contrast_de <- sig_differential[[pvt]][[i]][[dir]] - cols_to_round <- c(params$differential_fc_column, params$differential_pval_column, params$differential_qval_column) - contrast_de[, cols_to_round] <- signif(contrast_de[, cols_to_round], 8) - - colnames(contrast_de) <- prettifyVariablename(colnames(contrast_de)) - - if (nrow(contrast_de) > 0){ - print( htmltools::tagList(datatable(contrast_de, caption = paste('Differential genes', dir, 'in', contrast_descriptions[i], " (check", differential_files[[i]], "for more detail)"), rownames = FALSE) )) - }else{ - cat(paste0("No significantly differential '", dir, "' genes.\n\n")) - } + cat("\n#### ", contrast_descriptions[i], " {.tabset}\n") + + ## Make a volcano plot for the contrast first + + # Label features with symbol as well as identifier + if (! is.null(params$features) && (! is.null(params$differential_feature_name_column)) ){ + label_col <- params$differential_feature_name_column + }else{ + label_col <- params$differential_feature_id_column + } + + # Get the full set of differential stats for this contrast, removing rows with + # NAs in the fields we need. + full_de <- differential_results[[i]] + full_de <- subset(full_de, (! is.na(full_de[[params$differential_fc_column]])) & (! is.na(full_de[[params$differential_qval_colum]])) ) + + # We'll color by whether features are differential according to supplied thresholds + + p_value_types <- list(Adjusted = params$differential_qval_column, Unadjusted = params$differential_pval_column) + p_value_thresholds <- list(Adjusted = params$differential_max_qval, Unadjusted = params$differential_max_pval) + + for (pvt in names(p_value_types)){ + cat("\n##### ", pvt, " p values\n") + pval_column <- p_value_types[[pvt]] + + de_fc <- abs(full_de[[params$differential_fc_column]]) >= log2(params$differential_min_fold_change) + de_fc_label <- paste("abs(logFC) >=", log2(params$differential_min_fold_change)) + + de_pval <- full_de[[pval_column]] <= p_value_thresholds[[pvt]] + de_pval_label <- paste(pvt, "<=", p_value_thresholds[[pvt]]) + + de_pval_fc_label <- paste(de_fc_label, '&', de_pval_label) + + full_de$differential_status <- "Not significant" + full_de$differential_status[de_fc] <- de_fc_label + full_de$differential_status[de_pval] <- de_pval_label + full_de$differential_status[de_fc & de_pval] <- de_pval_fc_label + full_de$differential_status <- factor(full_de$differential_status, levels = c("Not significant", de_fc_label, de_pval_label, de_pval_fc_label), ordered = TRUE) # Factorize status so that non-significant is always first + # Define the thresholds we'll draw + + hline_thresholds = vline_thresholds = list() + hline_thresholds[[paste(pval_column, '=', p_value_thresholds[[pvt]])]] = -log10(p_value_thresholds[[pvt]]) + vline_thresholds[[paste(params$differential_fc_column, '<=', log2(params$differential_min_fold_change))]] = -log2(params$differential_min_fold_change) + vline_thresholds[[paste(params$differential_fc_column, '>=', log2(params$differential_min_fold_change))]] = log2(params$differential_min_fold_change) + + palette_volcano <- append(c('#999999'), makeColorScale(3, params$differential_palette_name)) # set non-significant to gray + + plot_args <- list( + x = full_de[[params$differential_fc_column]], + y = -log10(full_de[[pval_column]]), + colorby = full_de$differential_status, + ylab = paste("-log(10)", pval_column), + xlab = xlabel <- paste("higher in", contrasts$reference[i], " <<", params$differential_fc_column, ">> higher in", contrasts$target[i]), + labels = full_de[[label_col]], + hline_thresholds = hline_thresholds, + vline_thresholds = vline_thresholds, + show_labels = FALSE, + legend_title = "Differential status", + palette = palette_volcano + ) + + # Let's equalize the axes + max_fc <- max(abs(full_de[[params$differential_fc_column]])) * 1.1 + + # Print warning if any p values are 0 + zero_p <- length(which(full_de[[pval_column]]==0)) + if (zero_p) { + cat(paste0("", zero_p, " feature", ifelse(zero_p>1, "s are", " is"), " not shown because of p value = 0; please refer to the results tables.

")) + } + + p <- do.call(plotly_scatterplot, plot_args) %>% + layout(xaxis = list(range=list(-max_fc, max_fc))) + + print(htmltools::tagList(p)) + + ## ... then show tables of the up/ down genes + + for (dir in c('up', 'down')){ + contrast_de <- sig_differential[[pvt]][[i]][[dir]] + cols_to_round <- c(params$differential_fc_column, params$differential_pval_column, params$differential_qval_column) + contrast_de[, cols_to_round] <- signif(contrast_de[, cols_to_round], 8) + + colnames(contrast_de) <- prettifyVariablename(colnames(contrast_de)) + + if (nrow(contrast_de) > 0){ + contrast_de <- round_dataframe_columns(contrast_de, digits=params$report_round_digits) + print( htmltools::tagList(datatable(contrast_de, caption = paste('Differential genes', dir, 'in', contrast_descriptions[i], " (check", differential_files[[i]], "for more detail)"), rownames = FALSE) )) + }else{ + cat(paste0("No significantly differential '", dir, "' genes.\n\n")) + } + } } - } - } ``` ```{r, echo=FALSE, results='asis'} -possible_gene_set_methods <- c('gsea') +possible_gene_set_methods <- c('gsea', 'gprofiler2') if (any(unlist(params[paste0(possible_gene_set_methods, '_run')]))){ - cat("\n### Gene set analysis\n") - - for (gene_set_method in possible_gene_set_methods){ - if (unlist(params[paste0(gene_set_method, '_run')])){ - cat("\n#### ", toupper(gene_set_method) ," {.tabset}\n") - - for (gmt_file in simpleSplit(params$gsea_gene_sets)) { - gmt_name <- basename(tools::file_path_sans_ext(gmt_file)) - cat("\n##### ", gmt_name ," {.tabset}\n") - reference_gsea_tables <- paste0(contrasts$id, ".", gmt_name, '.gsea_report_for_', contrasts$reference, '.tsv') - target_gsea_tables <- paste0(contrasts$id, ".", gmt_name, '.gsea_report_for_', contrasts$target, '.tsv') - for (i in 1:nrow(contrasts)){ - cat("\n###### ", contrast_descriptions[i], "\n") - target_gsea_results <- read_metadata(target_gsea_tables[i])[,c(-2,-3)] - print( htmltools::tagList(datatable(target_gsea_results, caption = paste0("\nTarget (", contrasts$target[i], ")\n"), rownames = FALSE) )) - ref_gsea_results <- read_metadata(reference_gsea_tables[i])[,c(-2,-3)] - print( htmltools::tagList(datatable(ref_gsea_results, caption = paste0("\nReference (", contrasts$reference[i], ")\n"), rownames = FALSE) )) + cat("\n### Gene set analysis\n") + + for (gene_set_method in possible_gene_set_methods){ + if (unlist(params[paste0(gene_set_method, '_run')])){ + cat("\n#### ", toupper(gene_set_method) ," {.tabset}\n") + if (gene_set_method == 'gsea') { + for (gmt_file in simpleSplit(params$gene_sets_files)) { + gmt_name <- basename(tools::file_path_sans_ext(gmt_file)) + cat("\n##### ", gmt_name ," {.tabset}\n") + + reference_gsea_tables <- paste0(contrasts$id, ".", gmt_name, '.gsea_report_for_', contrasts$reference, '.tsv') + target_gsea_tables <- paste0(contrasts$id, ".", gmt_name, '.gsea_report_for_', contrasts$target, '.tsv') + for (i in 1:nrow(contrasts)){ + cat("\n###### ", contrast_descriptions[i], "\n") + target_gsea_results <- read_metadata(target_gsea_tables[i])[,c(-2,-3)] + target_gsea_results <- round_dataframe_columns(target_gsea_results, digits=params$report_round_digits) + print( htmltools::tagList(datatable(target_gsea_results, caption = paste0("\nTarget (", contrasts$target[i], ")\n"), rownames = FALSE) )) + ref_gsea_results <- read_metadata(reference_gsea_tables[i])[,c(-2,-3)] + ref_gsea_results <- round_dataframe_columns(ref_gsea_results, digits=params$report_round_digits) + print( htmltools::tagList(datatable(ref_gsea_results, caption = paste0("\nReference (", contrasts$reference[i], ")\n"), rownames = FALSE) )) + } + } + + } else if (gene_set_method == 'gprofiler2') { + + cat(paste0("\nThis section contains the results tables of the pathway analysis which was done with the R package gprofiler2. The differential fraction is the number of differential genes in a pathway divided by that pathway's size, i.e. the number of genes annotated for the pathway.", + ifelse(params$gprofiler2_significant, paste0(" Enrichment was only considered if significant, i.e. adjusted p-value <= ", params$gprofiler2_max_qval, "."), "Enrichment was also considered if not significant."), "\n")) + + # Make sure to grab only non-empty files + for (i in 1:nrow(contrasts)) { + cat(paste0("\n##### ", contrasts$id[i], "\n")) + + table <- paste0(contrasts$id[i], ".gprofiler2.all_enriched_pathways.tsv") + table_path <- file.path(params$input_dir, table) + if (!file.exists(table_path) || file.size(table_path) == 0){ + cat(paste0("No ", ifelse(params$gprofiler2_significant, "significantly", ""), " enriched pathways were found for this contrast.")) + } else { + all_enriched <- read.table(table_path, header=T, sep="\t", quote="\"") + all_enriched <- data.frame("Pathway name" = all_enriched$term_name, "Pathway code" = all_enriched$term_id, + "Differential features" = all_enriched$intersection_size, "Pathway size" = all_enriched$term_size, + "Differential fraction" = (all_enriched$intersection_size/all_enriched$term_size), + "Adjusted p value" = all_enriched$p_value, check.names = FALSE) + all_enriched <- round_dataframe_columns(all_enriched, digits=params$report_round_digits) + print(htmltools::tagList(datatable(all_enriched, caption = paste('Enriched pathways in', contrasts$id[i], " (check", table, "for more detail)"), rownames = FALSE))) + } + cat("\n") + } + } } - } } - } } ``` @@ -888,17 +978,17 @@ make_params_table('feature-wise filtering', 'filtering_', remove_pattern = TRUE) filtering_string <- paste0('Filtering was carried out by selecting ', params$features_type, 's with an abundance of at least ', params$filtering_min_abundance) if (is.null(params$filtering_grouping_var)){ - if (is.null(params$filtering_min_proportion)){ - filtering_string <- paste0(filtering_string, ' in at least ', params$filtering_min_samples, ' ', params$observations_type, 's.') - }else{ - filtering_string <- paste0(filtering_string, ' in at least a proportion of ', params$filtering_min_proportion, ' of ', params$observations_type,'s.') - } + if (is.null(params$filtering_min_proportion)){ + filtering_string <- paste0(filtering_string, ' in at least ', params$filtering_min_samples, ' ', params$observations_type, 's.') + }else{ + filtering_string <- paste0(filtering_string, ' in at least a proportion of ', params$filtering_min_proportion, ' of ', params$observations_type,'s.') + } }else{ - if (is.null(params$filtering_min_proportion)){ - filtering_string <- paste0(filtering_string, ' in at least the number of ', params$observations_type, 's corresponding to the smallest group size defined by the grouping variable "', params$filtering_grouping_var, '".') - }else{ - filtering_string <- paste0(filtering_string, ' in at least a proportion of ', params$filtering_min_proportion, ' of the number of ', params$observations_type,'s corresponding to the smallest group size defined by the grouping variable"', params$filtering_grouping_var, '".') - } + if (is.null(params$filtering_min_proportion)){ + filtering_string <- paste0(filtering_string, ' in at least the number of ', params$observations_type, 's corresponding to the smallest group size defined by the grouping variable "', params$filtering_grouping_var, '".') + }else{ + filtering_string <- paste0(filtering_string, ' in at least a proportion of ', params$filtering_min_proportion, ' of the number of ', params$observations_type,'s corresponding to the smallest group size defined by the grouping variable"', params$filtering_grouping_var, '".') + } } cat(filtering_string) ``` @@ -914,7 +1004,7 @@ make_params_table('exploratory analysis', 'exploratory_', remove_pattern = TRUE) ```{r, echo=FALSE, results='asis'} if (params$study_type == 'rnaseq'){ - make_params_table('DESeq2', 'deseq2_', remove_pattern = TRUE) + make_params_table('DESeq2', 'deseq2_', remove_pattern = TRUE) } make_params_table('downstream differential analysis', 'differential_', remove_pattern = TRUE) ``` @@ -922,17 +1012,17 @@ make_params_table('downstream differential analysis', 'differential_', remove_pa ```{r, echo=FALSE, results='asis'} -possible_gene_set_methods <- c('gsea') +possible_gene_set_methods <- c('gsea', 'gprofiler2') if (any(unlist(params[paste0(possible_gene_set_methods, '_run')]))){ - cat("\n### Gene set analysis\n") + cat("\n### Gene set analysis\n") - for (gene_set_method in possible_gene_set_methods){ - if (unlist(params[paste0(gene_set_method, '_run')])){ - cat("\n#### ", toupper(gene_set_method) ," {.tabset}\n") - make_params_table(toupper(gene_set_method), paste0(gene_set_method, '_'), remove_pattern = TRUE) + for (gene_set_method in possible_gene_set_methods){ + if (unlist(params[paste0(gene_set_method, '_run')])){ + cat("\n#### ", toupper(gene_set_method) ," {.tabset}\n") + make_params_table(toupper(gene_set_method), paste0(gene_set_method, '_'), remove_pattern = TRUE) + } } - } } ``` @@ -957,4 +1047,4 @@ print( htmltools::tagList(datatable(versions_table, caption = "Software versions ```{r, echo=FALSE, results='asis'} htmltools::includeMarkdown(params$citations) -``` \ No newline at end of file +``` diff --git a/assets/email_template.html b/assets/email_template.html index 0cac8291..38ec166e 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

nf-core/differentialabundance v${version}

+

nf-core/differentialabundance ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 58f0b039..44681e48 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/differentialabundance v${version} + nf-core/differentialabundance ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index f921df86..bc8bed92 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,5 @@ report_comment: > - This report has been generated by the nf-core/differentialabundance - analysis pipeline. For information about how to interpret these results, please see the - documentation. + This report has been generated by the nf-core/differentialabundance analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: "nf-core-differentialabundance-methods-description": order: -1000 @@ -11,3 +9,5 @@ report_section_order: order: -1002 export_plots: true + +disable_version_detection: true diff --git a/assets/nf-core-differentialabundance_logo_light.png b/assets/nf-core-differentialabundance_logo_light.png index 5ab625ef..0639e2ae 100644 Binary files a/assets/nf-core-differentialabundance_logo_light.png and b/assets/nf-core-differentialabundance_logo_light.png differ diff --git a/assets/nf-core_style.css b/assets/nf-core_style.css index 0195a723..240fe2e4 100644 --- a/assets/nf-core_style.css +++ b/assets/nf-core_style.css @@ -68,3 +68,14 @@ a { color: #24b064; text-decoration: none; } + +.div-row { + display: flex; + margin-bottom: 25px; +} + +.div-column { + flex: 1; + display: inline-block; + box-sizing: border-box; +} diff --git a/assets/slackreport.json b/assets/slackreport.json index 1e44b419..4be7003e 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/differentialabundance v${version} - ${runName}", + "author_name": "nf-core/differentialabundance ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index 4a758fe0..00000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,259 +0,0 @@ -#!/usr/bin/env python - - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".fq.gz", - ".fastq.gz", - ) - - def __init__( - self, - sample_col="sample", - first_col="fastq_1", - second_col="fastq_2", - single_col="single_end", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - first_col (str): The name of the column that contains the first (or only) - FASTQ file path (default "fastq_1"). - second_col (str): The name of the column that contains the second (if any) - FASTQ file path (default "fastq_2"). - single_col (str): The name of the new column that will be inserted and - records whether the sample contains single- or paired-end sequencing - reads (default "single_end"). - - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._first_col = first_col - self._second_col = second_col - self._single_col = single_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_first(row) - self._validate_second(row) - self._validate_pair(row) - self._seen.add((row[self._sample_col], row[self._first_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - if len(row[self._sample_col]) <= 0: - raise AssertionError("Sample input is required.") - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_first(self, row): - """Assert that the first FASTQ entry is non-empty and has the right format.""" - if len(row[self._first_col]) <= 0: - raise AssertionError("At least the first FASTQ file is required.") - self._validate_fastq_format(row[self._first_col]) - - def _validate_second(self, row): - """Assert that the second FASTQ entry has the right format if it exists.""" - if len(row[self._second_col]) > 0: - self._validate_fastq_format(row[self._second_col]) - - def _validate_pair(self, row): - """Assert that read pairs have the same file extension. Report pair status.""" - if row[self._first_col] and row[self._second_col]: - row[self._single_col] = False - first_col_suffix = Path(row[self._first_col]).suffixes[-2:] - second_col_suffix = Path(row[self._second_col]).suffixes[-2:] - if first_col_suffix != second_col_suffix: - raise AssertionError("FASTQ pairs must have the same file extensions.") - else: - row[self._single_col] = True - - def _validate_fastq_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): - raise AssertionError( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and FASTQ filename is unique. - - In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. - - """ - if len(self._seen) != len(self.modified): - raise AssertionError("The pair of sample name and FASTQ must be unique.") - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - row[self._sample_col] = f"{sample}_T{seen[sample]}" - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - - .. _viral recon samplesheet: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - - """ - required_columns = {"sample", "fastq_1", "fastq_2"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - header.insert(1, "single_end") - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/conf/affy.config b/conf/affy.config index 7d953d3b..d0ceb7a3 100644 --- a/conf/affy.config +++ b/conf/affy.config @@ -27,6 +27,7 @@ params { // Exploratory exploratory_assay_names = 'raw,normalised' exploratory_final_assay = 'normalised' + exploratory_log2_assays = null // Differential options differential_file_suffix = ".limma.results.tsv" diff --git a/conf/base.config b/conf/base.config index cbc9311c..99e804ea 100644 --- a/conf/base.config +++ b/conf/base.config @@ -36,7 +36,7 @@ process { } withLabel:process_medium { cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 2.GB * task.attempt, 'memory' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_high { @@ -57,7 +57,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } } diff --git a/conf/maxquant.config b/conf/maxquant.config index ad782b8a..8adf03c8 100644 --- a/conf/maxquant.config +++ b/conf/maxquant.config @@ -28,6 +28,7 @@ params { // Exploratory exploratory_assay_names = "raw,normalised" exploratory_final_assay = "normalised" + exploratory_log2_assays = null // Differential options differential_file_suffix = ".limma.results.tsv" @@ -38,7 +39,7 @@ params { differential_feature_name_column = "Majority protein IDs" // Proteus options - proteus_measurecol_prefix = 'LFQ intensity ' + proteus_measurecol_prefix = 'LFQ intensity' // Shiny does not work for this datatype shinyngs_build_app = false diff --git a/conf/modules.config b/conf/modules.config index 8b3b66e7..9313457d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -62,12 +62,11 @@ process { ] ext.prefix = { "raw." } ext.args = { [ - "--sample_name_col \"${params.observations_name_col}\"", + "--sample_name_col \"" + (params.observations_name_col == null ? params.observations_id_col : params.observations_name_col) + "\"", "--file_name_col \"${params.affy_file_name_col}\"", "--background ${params.affy_background}", "--normalize False", "--bgversion ${params.affy_bgversion}", - "--file_name_col \"${params.affy_file_name_col}\"", "--destructive ${params.affy_destructive}", "--rm.mask ${params.affy_rm_mask}", "--rm.outliers ${params.affy_rm_outliers}", @@ -86,12 +85,11 @@ process { ] ext.prefix = { "normalised." } ext.args = { [ - "--sample_name_col \"${params.observations_name_col}\"", + "--sample_name_col \"" + (params.observations_name_col == null ? params.observations_id_col : params.observations_name_col) + "\"", "--file_name_col \"${params.affy_file_name_col}\"", "--background ${params.affy_background}", "--normalize True", "--bgversion ${params.affy_bgversion}", - "--file_name_col \"${params.affy_file_name_col}\"", "--destructive ${params.affy_destructive}", "--rm.mask ${params.affy_rm_mask}", "--rm.outliers ${params.affy_rm_outliers}", @@ -133,7 +131,7 @@ process { "--plotsd_method $params.proteus_plotsd_method", "--plotmv_loess $params.proteus_plotmv_loess", "--palette_name $params.proteus_palette_name", - "--round_digits $params.proteus_round_digits" + "--round_digits $params.report_round_digits" ].join(' ').trim() } } @@ -171,6 +169,7 @@ process { ], ] ext.args = { [ + "--gene_id_col \"${params.features_id_col}\"", "--sample_id_col \"${params.observations_id_col}\"", "--test $params.deseq2_test", "--fit_type $params.deseq2_fit_type", @@ -281,18 +280,24 @@ process { ].join(' ').trim() } } - withName: GSEA_GSEA { - ext.prefix = { "${meta.id}.${gene_sets.baseName}." } + withName: FILTER_DIFFTABLE { + ext.prefix = { "${meta.id}" } publishDir = [ [ - path: { "${params.outdir}/tables/gsea/${meta.id}/${gene_sets.baseName}" }, + path: { "${params.outdir}/tables/differential" }, mode: params.publish_dir_mode, - pattern: '*gsea_report_for_*.tsv' - ], + pattern: '*_filtered.tsv' + ] + ] + } + + withName: GSEA_GSEA { + ext.prefix = { "${meta.id}.${gene_sets.baseName}." } + publishDir = [ [ - path: { "${params.outdir}/plots/gsea/${meta.id}/${gene_sets.baseName}" }, + path: { "${params.outdir}/report/gsea/${meta.id}/${gene_sets.baseName}" }, mode: params.publish_dir_mode, - pattern: '*.png' + pattern: '*.{html,zip,png,tsv,rpt}' ] ] ext.args = { [ @@ -316,6 +321,47 @@ process { ].join(' ').trim() } } + withName: GPROFILER2_GOST { + publishDir = [ + [ + path: { "${params.outdir}/tables/gprofiler2/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: '*.tsv' + ], + [ + path: { "${params.outdir}/plots/gprofiler2/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: '*.{png,html}' + ], + [ + path: { "${params.outdir}/other/gprofiler2/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: '*.{rds,gmt}' + ], + [ + path: { "${params.outdir}/other/gprofiler2/" }, + mode: params.publish_dir_mode, + pattern: '*.{rds,sessionInfo.log}' + ] + ] + ext.args = { [ + "--significant \"${params.gprofiler2_significant}\"", + "--measure_underrepresentation \"${params.gprofiler2_measure_underrepresentation}\"", + "--correction_method \"${params.gprofiler2_correction_method}\"", + "--evcodes \"${params.gprofiler2_evcodes}\"", + "--pval_threshold \"${params.gprofiler2_max_qval}\"", + "--domain_scope ${params.gprofiler2_domain_scope}", + "--min_diff \"${params.gprofiler2_min_diff}\"", + "--round_digits ${params.report_round_digits}", + "--palette_name \"${params.gprofiler2_palette_name}\"", + ((params.differential_feature_id_column == null) ? '' : "--de_id_column \"${params.differential_feature_id_column}\""), + ((params.gprofiler2_token == null) ? '' : "--token \"${params.gprofiler2_token}\""), + ((params.gprofiler2_organism == null) ? '' : "--organism \"${params.gprofiler2_organism}\""), + ((params.gprofiler2_background_column == null) ? '' : "--background_column \"${params.gprofiler2_background_column}\""), + ((params.gprofiler2_sources == null) ? '' : "--sources \"${params.gprofiler2_sources}\"") + ].join(' ').trim() } + } + withName: PLOT_EXPLORATORY { publishDir = [ path: { "${params.outdir}/plots/exploratory" }, @@ -329,7 +375,7 @@ process { "--final_assay \"${params.exploratory_final_assay}\"", "--outlier_mad_threshold ${params.exploratory_mad_threshold}", "--palette_name \"${params.exploratory_palette_name}\"", - ( (params.study_type == 'maxquant') ? "--log2_assays ''" : (((params.features_log2_assays == null) ? '' : "--log2_assays \"$params.features_log2_assays\"".replace('[', '').replace(']', ''))) ) + ( (params.study_type == 'maxquant') ? "--log2_assays ''" : (((params.exploratory_log2_assays == null) ? '' : "--log2_assays \"$params.exploratory_log2_assays\"".replace('[', '').replace(']', ''))) ) ].join(' ').trim() } } @@ -365,7 +411,7 @@ process { "--assay_names \"${params.exploratory_assay_names}\"", "--sample_id_col \"${params.observations_id_col}\"", "--feature_id_col \"${params.features_id_col}\"", - "--feature_name_col \"${params.features_name_col}\"", + "--feature_name_col \"" + (params.features_name_col == null ? params.features_id_col : params.features_name_col) + "\"", "--diff_feature_id_col \"${params.differential_feature_id_column}\"", "--fold_change_column \"${params.differential_fc_column}\"", "--pval_column \"${params.differential_pval_column}\"", @@ -381,21 +427,15 @@ process { ].join(' ').trim() } } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } - withName: RMARKDOWNNOTEBOOK { - conda = "bioconda::r-shinyngs=1.8.4" - container = { "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.8.4--r43hdfd78af_0' : 'biocontainers/r-shinyngs:1.8.4--r43hdfd78af_0' }" } + conda = "bioconda::r-shinyngs=1.8.8" + container = { "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.8.8--r43hdfd78af_0' : 'biocontainers/r-shinyngs:1.8.8--r43hdfd78af_0' }" } publishDir = [ - path: { "${params.outdir}/report" }, - mode: params.publish_dir_mode, - pattern: '*.html' + [ + path: { "${params.outdir}/report" }, + mode: params.publish_dir_mode, + pattern: '*.html' + ] ] } @@ -417,7 +457,9 @@ process { "--minimum_samples ${params.filtering_min_samples}", "--minimum_abundance ${params.filtering_min_abundance}", (params.filtering_min_proportion ? "--minimum_proportion ${params.filtering_min_proportion}" : ''), - (params.filtering_grouping_var ? "--grouping_variable \"${params.filtering_grouping_var}\"" : '') + (params.filtering_grouping_var ? "--grouping_variable \"${params.filtering_grouping_var}\"" : ''), + (params.filtering_min_proportion_not_na ? "--minimum_proportion_not_na \"${params.filtering_min_proportion_not_na}\"" : ''), + (params.filtering_min_samples_not_na ? "--minimum_samples_not_na \"${params.filtering_min_samples_not_na}\"" : '') ].join(' ').trim() } diff --git a/conf/rnaseq.config b/conf/rnaseq.config index 10427bf1..c79d4828 100644 --- a/conf/rnaseq.config +++ b/conf/rnaseq.config @@ -27,6 +27,7 @@ params { // Exploratory exploratory_assay_names = "raw,normalised,variance_stabilised" exploratory_final_assay = "variance_stabilised" + exploratory_log2_assays = 'raw,normalised' // Differential options differential_file_suffix = ".deseq2.results.tsv" diff --git a/conf/soft.config b/conf/soft.config index 5109b25d..832ebc17 100644 --- a/conf/soft.config +++ b/conf/soft.config @@ -33,6 +33,7 @@ params { // Exploratory exploratory_assay_names = 'normalised' exploratory_final_assay = 'normalised' + exploratory_log2_assays = null // Differential options differential_file_suffix = ".limma.results.tsv" diff --git a/conf/test.config b/conf/test.config index f4436b3d..4d4e9454 100644 --- a/conf/test.config +++ b/conf/test.config @@ -47,5 +47,9 @@ params { // Activate GSEA gsea_run = true - gsea_gene_sets = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt' + gene_sets_files = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt' + + // Report options + report_round_digits = 3 + report_contributors = 'Jane Doe\nDirector of Institute of Microbiology\nUniversity of Smallville;John Smith\nPhD student\nInstitute of Microbiology\nUniversity of Smallville' } diff --git a/conf/test_affy.config b/conf/test_affy.config index a8db1409..372e0577 100644 --- a/conf/test_affy.config +++ b/conf/test_affy.config @@ -42,5 +42,5 @@ params { // Activate GSEA gsea_run = true - gsea_gene_sets = 'https://raw.githubusercontent.com/nf-core/test-datasets/differentialabundance/testdata/h.all.v2022.1.Hs.symbols.gmt' + gene_sets_files = 'https://raw.githubusercontent.com/nf-core/test-datasets/differentialabundance/testdata/h.all.v2022.1.Hs.symbols.gmt' } diff --git a/conf/test_full.config b/conf/test_full.config index cbd8e212..dcc87126 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -34,5 +34,9 @@ params { // Activate GSEA gsea_run = true - gsea_gene_sets = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt' + gene_sets_files = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt' + + // Activate gprofiler2 + gprofiler2_run = true + gprofiler2_organism = 'mmusculus' } diff --git a/docs/images/nf-core-differentialabundance_logo_dark.png b/docs/images/nf-core-differentialabundance_logo_dark.png index 02cb1ce5..cafd1c6c 100644 Binary files a/docs/images/nf-core-differentialabundance_logo_dark.png and b/docs/images/nf-core-differentialabundance_logo_dark.png differ diff --git a/docs/images/nf-core-differentialabundance_logo_light.png b/docs/images/nf-core-differentialabundance_logo_light.png index 5ab625ef..f23e51a2 100644 Binary files a/docs/images/nf-core-differentialabundance_logo_light.png and b/docs/images/nf-core-differentialabundance_logo_light.png differ diff --git a/docs/images/workflow.png b/docs/images/workflow.png index 5b73b936..21bb9011 100644 Binary files a/docs/images/workflow.png and b/docs/images/workflow.png differ diff --git a/docs/images/workflow.svg b/docs/images/workflow.svg index 21313a3c..6a53d260 100644 --- a/docs/images/workflow.svg +++ b/docs/images/workflow.svg @@ -3,11 +3,11 @@ + inkscape:export-bgcolor="#ffffffff" + showgrid="false" /> + + y="348.19925" /> + y="331.51535" /> + x="111.85512" + y="288.01227" /> + +getGEO + id="tspan5907">getGEO +justRMA + id="tspan5911">justRMA +readProteinGroups + id="tspan5915">readProteinGroups GTF to GTF to +table + id="tspan5919">table Validate + id="tspan5921">Validate Limma + id="tspan5923">Limma GSEA + id="tspan5925">GSEA + gprofiler2 DESeq2 + id="tspan5929">DESeq2 Filter matrix + id="tspan5931">Filter matrix + y="391.19058" /> + y="391.10669" /> GEO ID + id="tspan5933">GEO ID Maxquant Maxquant +output + id="tspan5937">output Affy Affy +intensities + id="tspan5941">intensities Reference Reference +annotation + id="tspan5947">annotation Contrast Contrast +definitions + id="tspan5953">definitions @@ -1132,10 +1164,11 @@ y="0">Feature Feature +annotations + id="tspan5957">annotations Abundance Abundance +values + id="tspan5961">values Observation Observation +annotations + id="tspan5965">annotations + + + + + + Plot exploratory + id="tspan5967">Plot exploratory Plot differential + id="tspan5969">Plot differential + R Markdown notebook @@ -1517,24 +1594,24 @@ inkscape:export-xdpi="90" inkscape:export-filename="./polygon4618.png" id="text3362-8-6" - y="325.30905" + y="378.22653" x="112.9056" style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:6.35px;line-height:1.35;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro';display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583" xml:space="preserve">Build Shiny app TSV + + + + + + + TSV + + + + + + + diff --git a/docs/output.md b/docs/output.md index 26c247ec..080bd870 100644 --- a/docs/output.md +++ b/docs/output.md @@ -38,6 +38,10 @@ Stand-alone graphical outputs are placed in this directory. They may be useful i - `[contrast]/png/volcano.png`: Volcano plots of -log(10) p value agains log(2) fold changes - `gsea/`: Directory containing graphical outputs from GSEA (where enabled). Plots are stored in directories named for the associated contrast. - `[contrast]/png/[gsea_plot_type].png` + - `gprofiler2/`: Directory containing graphical outputs from gprofiler2 (where enabled). Plots are stored in directories named for the associated contrast. + - `[contrast]/[contrast].gprofiler2.[source].gostplot.html`: An interactive gprofiler2 Manhattan plot of enriched pathways from one specific source/database, e.g. REAC + - `[contrast]/[contrast].gprofiler2.[source].gostplot.png`: A static gprofiler2 Manhattan plot of enriched pathways from one specific source/database, e.g. REAC + - `[contrast]/[contrast].gprofiler2.[source].sub_enriched_pathways.png`: A gprofiler2 bar plot of enriched pathways and how strongly enriched they are from one specific source/database, e.g. REAC - `proteus/`: If `--study_type maxquant`: Directory containing plots produced by the proteus module which is used for processing MaxQuant input. Files are prefixed with the associated contrast and chosen normalization function (if any). - `[contrast]/[norm_function].normalized_dendrogram.png`: A sample clustering dendrogram after normalization. - `[contrast]/[norm_function].normalized_mean_variance_relationship.png`: Plots of log intensity vs mean log intensity after normalization of each contrast level. @@ -63,10 +67,13 @@ Most plots are included in the HTML report (see above), but are also included in - `raw.matrix.tsv`: RMA background corrected matrix (Affy) - `normalised.matrix.tsv`: RMA background corrected and normalised intensities matrix (Affy) - `differential/`: Directory containing tables of differential statistics reported by differential modules such as DESeq2 - - `[contrast_name].deseq2.results.tsv`: Results of DESeq2 differential analyis (RNA-seq) - - `OR [contrast_name].limma.results.tsv`: Results of Limma differential analyis (Affymetrix arrays) + - `[contrast_name].[deseq2|limma].results.tsv`: Results of DESeq2 differential analyis (RNA-seq) OR Limma differential analysis (Affymetrix arrays, GEO studies, Maxquant proteomics studies) + - `[contrast_name].[deseq2|limma].results_filtered.tsv`: Results of DESeq2 differential analyis (RNA-seq) OR Limma differential analysis (Affymetrix arrays, GEO studies, Maxquant proteomics studies); filtered for differentially abundant entries - `gsea/`: Directory containing tables of differential gene set analyis from GSEA (where enabled) - `[contrast]/[contrast].gsea_report_for_[condition].tsv`: A GSEA report table for each side of each contrast + - `gprofiler2/`: Directory containing tables of differential gene set analyis from gprofiler2 (where enabled) + - `[contrast]/[contrast].gprofiler2.all_enriched_pathways.tsv`: A gprofiler2 report table for all enrichment results + - `[contrast]/[contrast].gprofiler2.[source].sub_enriched_pathways.tsv`: A gprofiler2 report table of enriched pathways from one specific source/database, e.g. REAC - `proteus/`: If `--study_type maxquant`: Directory containing abundance values produced by the proteus module which is used for processing MaxQuant input. Files are prefixed with the associated contrast and chosen normalization function (if any). - `[contrast]/[norm_function].normalized_proteingroups_tab.tsv`: Abundance table after normalization. - `[contrast]/raw_proteingroups_tab.tsv`: Abundance table without normalization. diff --git a/docs/usage.md b/docs/usage.md index 8365799d..d474d611 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -23,7 +23,11 @@ With the above in mind, running this workflow requires: --input '[path to samplesheet file]' ``` -This may well be the same sample sheet used to generate the input matrix. For example, in RNA-seq this might be the same sample sheet, perhaps derived from [fetchngs](https://github.com/nf-core/fetchngs), that was input to the [RNA-seq workflow](https://github.com/nf-core/rnaseq). It may be necessary to add columns that describe the groups you want to compare. +This may well be the same sample sheet used to generate the input matrix. For example, in RNA-seq this might be the same sample sheet, perhaps derived from [fetchngs](https://github.com/nf-core/fetchngs), that was input to the [RNA-seq workflow](https://github.com/nf-core/rnaseq). It may be necessary to add columns that describe the groups you want to compare. The columns that the pipeline requires are: + +- a column listing the sample IDs (must be the same IDs as in the abundance matrix), in the example below it is called 'sample'. For some study_types, this column might need to be filled in with file names, e.g. when doing an affymetrix analysis. +- one or more columns describing conditions for the differential analysis. In the example below it is called 'condition' +- optionally one or more columns describing sample batches or similar which you want to be considered in the analysis. In the example below it is called 'batch' For example: @@ -96,7 +100,7 @@ So we **do not recommend** raw counts files such as `salmon.merged.gene_counts.t --matrix '[path to matrix file]' ``` -This is the proteinGroups.txt file produced by MaxQuant. It is a tab-separated matrix file with a column for every observation (plus additional columns for other types of measurements and information); each row contains these data for a set of proteins. The parameters `--observations_id_col` and `--features_id_col` define which of the associated fields should be matched in those inputs. The parameter `--proteus_measurecol_prefix` defines which prefix is used to extract those matrix columns which contain the measurements to be used. For example, the default `LFQ intensity ` will indicate that columns like LFQ intensity S1, LFQ intensity S2, LFQ intensity S3 etc. are used (do not forget trailing whitespace in this parameter, if required!). +This is the proteinGroups.txt file produced by MaxQuant. It is a tab-separated matrix file with a column for every observation (plus additional columns for other types of measurements and information); each row contains these data for a set of proteins. The parameters `--observations_id_col` and `--features_id_col` define which of the associated fields should be matched in those inputs. The parameter `--proteus_measurecol_prefix` defines which prefix is used to extract those matrix columns which contain the measurements to be used. For example, the default `LFQ intensity ` will indicate that columns like LFQ intensity S1, LFQ intensity S2, LFQ intensity S3 etc. are used (one whitespace is automatically added if necessary). ### Affymetrix microarrays @@ -269,6 +273,34 @@ With this configuration in place deployment should happen automatically every ti There is also a [Shiny server application](https://posit.co/download/shiny-server/), which you can install on your own infrastruture and use to host applications yourself. +## Gene set enrichment analysis + +Currently, two tools can be used to do gene set enrichment analysis. + +### GSEA + +[GSEA](https://www.gsea-msigdb.org/gsea/index.jsp) tests for differential genes from within a user-provided set of genes; this requires a GMT or GMX file. The following example shows how to enable this: + +```bash +--gsea_run true \ +--gene_sets_files gene_sets.gmt +``` + +### g:Profiler + +The [gprofiler2](https://cran.r-project.org/web/packages/gprofiler2/vignettes/gprofiler2.html) package can be used to test which pathways are enriched in the sets of differential genes produced by the the DESeq2 or limma modules. It is an R interface for the g:Profiler webtool. In the simplest form, this feature can be enabled with the parameters from the following example: + +```bash +--gprofiler2_run true \ +--gprofiler2_organism mmusculus +``` + +If gene sets have been specified to the workflow via `--gene_sets_files` these are used by default. Specifying `--gprofiler2_organism` (mmusculus for Mus musculus, hsapiens for Homo sapiens etc.) will override those gene sets with g:profiler's own for the relevant species. `--gprofiler2_token` will override both options and use gene sets from a previous g:profiler run. + +By default the analysis will be run with a background list of genes that passed the abundance filter (i.e. those genes that actually had some expression); see for example https://doi.org/10.1186/s13059-015-0761-7 for why this is advisable. You can provide your own background list with `--gprofiler2_background_file background.txt`or if you want to not use any background, set `--gprofiler2_background_file false`. + +Check the [pipeline webpage](https://nf-co.re/differentialabundance/parameters#gprofiler2) for a full listing of the relevant parameters. + ## Running the pipeline The typical command for running the pipeline is as follows: @@ -300,6 +332,26 @@ work # Directory containing the nextflow working files - If you don't like the colors used in the report, try a different `RColorBrewer` palette by changing the `exploratory_palette_name` and/or `differential_palette_name` parameters. - In rare cases, some users have reported issues with DESeq2 using all available cores on a machine, rather than those specified in the process configuration. This can be prevented by setting the `OPENBLAS_NUM_THREADS` environment variable. +### Scaling up to large sample numbers + +#### Deactivating reporting processes + +A number of workflow steps are not optimised to deal with large sample numbers and will cause the overall workflow to fail. If you have sample numbers on the order of 100s or more, you should disable these processes like: + +``` +process { + withName:'PLOT_EXPLORATORY|PLOT_DIFFERENTIAL|RMARKDOWNNOTEBOOK|MAKE_REPORT_BUNDLE|SHINYNGS_APP'{ + ext.when = false + } +} +``` + +You will not get the final reporting outcomes of the workflow, but you will get the differential tables produced by DESeq2 or Limma, and the results of any gene seta analysis you have enabled. + +#### Restricting samples considered by DESeq2 or Limma + +By default, the DESeq2 or Limma differential modules model all samples at once, rather than just the samples involved in the contrast. This is usually the correct thing to do, but when there are are large numbers of samples involved in each contrast it may be unnecessary, and things can be sped up significantly by setting `--differential_subset_to_contrast_samples`. This will remove any samples not relevant to the contrast before the main differential analysis routines are called. + ### Params files If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 01b8653d..00000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,352 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Dump pipeline parameters in a json file - // - public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") - def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100644 index 8d030f4e..00000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowDifferentialabundance.groovy b/lib/WorkflowDifferentialabundance.groovy deleted file mode 100755 index 4acad311..00000000 --- a/lib/WorkflowDifferentialabundance.groovy +++ /dev/null @@ -1,123 +0,0 @@ -// -// This file holds several functions specific to the workflow/differentialabundance.nf in the nf-core/differentialabundance pipeline -// - -import nextflow.Nextflow -import groovy.text.SimpleTemplateEngine - -class WorkflowDifferentialabundance { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - - genomeExistsError(params, log) - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Generate methods description for MultiQC - // - - public static String toolCitationText(params) { - def citation_text = [ - "Tools used in the workflow included:", - params["study_type"] == 'affy_array' ? "affy (Gautier et al. 2004": "", - params["study_type"] == 'rnaseq' ? "DESeq2 (Love et al 2014)," : "", - "ggplot2 (Wickham 2016)", - "GEOQuery (Davis et al. 2007", - params["study_type"] != 'rnaseq' ? "Limma (Ritchie eta al 2015" : "", - "optparse (Davis 2018)", - "plotly (Sievert 2020)", - params["study_type"] != 'maxquant' ? "Proteus (Gierlinski 2018)" : "", - "RColorBrewer (Neuwirth 2014)", - "RMarkdown (Allaire et al. 2022)", - "shinyngs (Manning 2022)", - "SummarizedExperiment (Morgan et al. 2020)", - "." - ].join(' ').trim() - - return citation_text - } - - public static String toolBibliographyText(params) { - - // TODO Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) - - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - } - - // - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - Nextflow.error(error_string) - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index 48ae8ce2..00000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,61 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/differentialabundance pipeline -// - -import nextflow.Nextflow - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - " https://doi.org/10.5281/zenodo.7568000\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") - } - } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5..00000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 28432388..8d350dad 100644 --- a/main.nf +++ b/main.nf @@ -13,63 +13,62 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') +include { DIFFERENTIALABUNDANCE } from './workflows/differentialabundance' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_differentialabundance_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_differentialabundance_pipeline' + +include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_differentialabundance_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsHelp } from 'plugin/nf-validation' - -// Print help message if needed -if (params.help) { - def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) - def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) - System.exit(0) -} - -// Validate input parameters -if (params.validate_params) { - validateParameters() -} - -WorkflowMain.initialise(workflow, params, log) +params.gtf = getGenomeAttribute('gtf') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + NAMED WORKFLOWS FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { DIFFERENTIALABUNDANCE } from './workflows/differentialabundance' - // -// WORKFLOW: Run main nf-core/differentialabundance analysis pipeline +// WORKFLOW: Run main analysis pipeline depending on type of input // workflow NFCORE_DIFFERENTIALABUNDANCE { DIFFERENTIALABUNDANCE () } - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { + + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input + ) + + // + // WORKFLOW: Run main workflow + // NFCORE_DIFFERENTIALABUNDANCE () + } /* diff --git a/modules.json b/modules.json index e2f69a77..6362693e 100644 --- a/modules.json +++ b/modules.json @@ -7,27 +7,22 @@ "nf-core": { "affy/justrma": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "02a65e8871e06bac203dec16985fa3de648c99b6", "installed_by": ["modules"] }, "atlasgeneannotationmanipulation/gtf2featureannotation": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules"] - }, - "custom/dumpsoftwareversions": { - "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "04bc484c987b523ea5420ed6bbc1fdc6d8aef751", "installed_by": ["modules"] }, "custom/matrixfilter": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", "installed_by": ["modules"] }, "custom/tabulartogseacls": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", "installed_by": ["modules"] }, "custom/tabulartogseagct": { @@ -37,7 +32,7 @@ }, "deseq2/differential": { "branch": "master", - "git_sha": "c992bd93b22a97feda5f04755511366c45423626", + "git_sha": "9326d73af3fbe2ee90d9ce0a737461a727c5118e", "installed_by": ["modules"] }, "geoquery/getgeo": { @@ -45,9 +40,14 @@ "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", "installed_by": ["modules"] }, + "gprofiler2/gost": { + "branch": "master", + "git_sha": "c75e76bff35e2ee5305ebe89b513637b38e79d1d", + "installed_by": ["modules"] + }, "gsea/gsea": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "7aa7ced253469386f0645133ec3e838098dbe67c", "installed_by": ["modules"] }, "gunzip": { @@ -57,12 +57,12 @@ }, "limma/differential": { "branch": "master", - "git_sha": "25047aa940979f64b31d19b94c483a9254263892", + "git_sha": "9326d73af3fbe2ee90d9ce0a737461a727c5118e", "installed_by": ["modules"] }, "proteus/readproteingroups": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "a069b29783583c219c1f23ed3dcf64a5aee1340b", "installed_by": ["modules"] }, "rmarkdownnotebook": { @@ -72,22 +72,22 @@ }, "shinyngs/app": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "85519fe9deccf2c5f7ff1f3b5d3494c61a794643", "installed_by": ["modules"] }, "shinyngs/staticdifferential": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "85519fe9deccf2c5f7ff1f3b5d3494c61a794643", "installed_by": ["modules"] }, "shinyngs/staticexploratory": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "85519fe9deccf2c5f7ff1f3b5d3494c61a794643", "installed_by": ["modules"] }, "shinyngs/validatefomcomponents": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "85519fe9deccf2c5f7ff1f3b5d3494c61a794643", "installed_by": ["modules"] }, "untar": { @@ -103,7 +103,23 @@ } }, "subworkflows": { - "nf-core": {} + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/local/filter_difftable.nf b/modules/local/filter_difftable.nf new file mode 100644 index 00000000..da9612e3 --- /dev/null +++ b/modules/local/filter_difftable.nf @@ -0,0 +1,52 @@ +process FILTER_DIFFTABLE { + + label 'process_single' + + conda "pandas=1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pandas:1.5.2' : + 'biocontainers/pandas:1.5.2' }" + + input: + tuple val(meta), path(input_file) + tuple val(logFC_column), val(FC_threshold) + tuple val(padj_column), val(padj_threshold) + + output: + tuple val(meta), path("*_filtered.tsv") , emit: filtered + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + #!/usr/bin/env python + + from math import log2 + from os import path + import pandas as pd + import platform + from sys import exit + + # 1. Check that the current logFC/padj is not NA + # 2. Check that the current logFC is >= threshold (abs does not work, so use a workaround) + # 3. Check that the current padj is <= threshold + # If this is true, the row is written to the new file, otherwise not + if not any("$input_file".endswith(ext) for ext in [".csv", ".tsv", ".txt"]): + exit("Please provide a .csv, .tsv or .txt file!") + + table = pd.read_csv("$input_file", sep=("," if "$input_file".endswith(".csv") else "\t"), header=0) + logFC_threshold = log2(float("$FC_threshold")) + table = table[~table["$logFC_column"].isna() & + ~table["$padj_column"].isna() & + (pd.to_numeric(table["$logFC_column"], errors='coerce').abs() >= float(logFC_threshold)) & + (pd.to_numeric(table["$padj_column"], errors='coerce') <= float("$padj_threshold"))] + + table.to_csv(path.splitext(path.basename("$input_file"))[0]+"_filtered.tsv", sep="\t", index=False) + + with open('versions.yml', 'a') as version_file: + version_file.write('"${task.process}":' + "\\n") + version_file.write(" pandas: " + str(pd.__version__) + "\\n") + """ +} diff --git a/modules/nf-core/affy/justrma/environment.yml b/modules/nf-core/affy/justrma/environment.yml index 587ede1e..b0afd895 100644 --- a/modules/nf-core/affy/justrma/environment.yml +++ b/modules/nf-core/affy/justrma/environment.yml @@ -1,3 +1,4 @@ +name: affy_justrma channels: - conda-forge - bioconda diff --git a/modules/nf-core/affy/justrma/main.nf b/modules/nf-core/affy/justrma/main.nf index 6dcfdf29..62c71d24 100644 --- a/modules/nf-core/affy/justrma/main.nf +++ b/modules/nf-core/affy/justrma/main.nf @@ -2,7 +2,7 @@ process AFFY_JUSTRMA { tag "$meta.id" label 'process_single' - conda 'modules/nf-core/affy/justrma/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bioconductor-affy:1.78.0--r43ha9d7317_1': 'biocontainers/bioconductor-affy:1.78.0--r43ha9d7317_1' }" diff --git a/modules/nf-core/affy/justrma/meta.yml b/modules/nf-core/affy/justrma/meta.yml index 8b1685fc..cd9b6d6c 100644 --- a/modules/nf-core/affy/justrma/meta.yml +++ b/modules/nf-core/affy/justrma/meta.yml @@ -12,7 +12,7 @@ tools: documentation: "https://www.bioconductor.org/packages/release/bioc/html/affy.html" tool_dev_url: "https://github.com/Bioconductor/affy" doi: "10.1093/bioinformatics/btg405" - licence: "['LGPL >=2.0']" + licence: ["LGPL >=2.0"] input: - meta: type: map diff --git a/modules/nf-core/affy/justrma/templates/affy_justrma.R b/modules/nf-core/affy/justrma/templates/affy_justrma.R index c632b706..e84b532d 100755 --- a/modules/nf-core/affy/justrma/templates/affy_justrma.R +++ b/modules/nf-core/affy/justrma/templates/affy_justrma.R @@ -69,7 +69,7 @@ install_cdf_db <- function(celfile, annotation = FALSE){ if (annotation){ exts <- c(exts, '.db') } - + options(timeout=600) for (package in paste0(cleaned.cdfName, exts)){ install.packages( package, diff --git a/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/environment.yml b/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/environment.yml index 67ae588e..3a613e61 100644 --- a/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/environment.yml +++ b/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/environment.yml @@ -1,6 +1,7 @@ +name: atlasgeneannotationmanipulation_gtf2featureannotation channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::atlas-gene-annotation-manipulation=1.1.0 + - bioconda::atlas-gene-annotation-manipulation=1.1.1 diff --git a/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/main.nf b/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/main.nf index c71e0f7f..12208f81 100644 --- a/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/main.nf +++ b/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/main.nf @@ -2,10 +2,10 @@ process ATLASGENEANNOTATIONMANIPULATION_GTF2FEATUREANNOTATION { tag "${meta.id}" label 'process_single' - conda 'modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/atlas-gene-annotation-manipulation%3A1.1.0--hdfd78af_0': - 'biocontainers/atlas-gene-annotation-manipulation:1.1.0--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/atlas-gene-annotation-manipulation%3A1.1.1--hdfd78af_0': + 'biocontainers/atlas-gene-annotation-manipulation:1.1.1--hdfd78af_0' }" input: tuple val(meta), path(gtf) @@ -23,6 +23,7 @@ process ATLASGENEANNOTATIONMANIPULATION_GTF2FEATUREANNOTATION { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: meta.id def reference_cdna = fasta ? "--parse-cdnas $fasta" : "" + def VERSION = '1.1.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ gtf2featureAnnotation.R \\ @@ -33,8 +34,22 @@ process ATLASGENEANNOTATIONMANIPULATION_GTF2FEATUREANNOTATION { cat <<-END_VERSIONS > versions.yml "${task.process}": - r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') - atlas-gene-annotation-manipulation: 1.1.0 + atlas-gene-annotation-manipulation: ${VERSION} END_VERSIONS """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: meta.id + def reference_cdna = fasta ? "--parse-cdnas $fasta" : "" + def VERSION = '1.1.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${meta.id}.anno.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + atlas-gene-annotation-manipulation: ${VERSION} + END_VERSIONS + """ + } diff --git a/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/meta.yml b/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/meta.yml index 10aa5366..3e34244e 100644 --- a/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/meta.yml +++ b/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/meta.yml @@ -2,6 +2,7 @@ name: "atlasgeneannotationmanipulation_gtf2featureannotation" description: Generate tables of feature metadata from GTF files keywords: - gtf + - gene - annotation tools: - "atlasgeneannotationmanipulation": @@ -9,7 +10,7 @@ tools: homepage: "https://github.com/ebi-gene-expression-group/atlas-gene-annotation-manipulation" documentation: "https://github.com/ebi-gene-expression-group/atlas-gene-annotation-manipulation" tool_dev_url: "https://github.com/ebi-gene-expression-group/atlas-gene-annotation-manipulation" - licence: "['Apache-2.0']" + licence: ["Apache-2.0"] input: - meta: type: map diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml deleted file mode 100644 index 7ca22161..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ /dev/null @@ -1,6 +0,0 @@ -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::multiqc=1.15 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index 60a19e0e..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda 'modules/nf-core/custom/dumpsoftwareversions/environment.yml' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : - 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index 9414c32d..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,37 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - dump - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" -maintainers: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100755 index da033408..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python - - -"""Provide functions to merge multiple versions.yml files.""" - - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - """Generate a tabular HTML output of all versions for MultiQC.""" - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -def main(): - """Load all version files and generate merged output.""" - versions_this_module = {} - versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, - } - - with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - - # aggregate versions by the module name (derived from fully-qualified process name) - versions_by_module = {} - for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - - versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - } - - versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), - } - - with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - - with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) - - -if __name__ == "__main__": - main() diff --git a/modules/nf-core/custom/matrixfilter/environment.yml b/modules/nf-core/custom/matrixfilter/environment.yml index 383c136b..322c673e 100644 --- a/modules/nf-core/custom/matrixfilter/environment.yml +++ b/modules/nf-core/custom/matrixfilter/environment.yml @@ -1,3 +1,4 @@ +name: custom_matrixfilter channels: - conda-forge - bioconda diff --git a/modules/nf-core/custom/matrixfilter/main.nf b/modules/nf-core/custom/matrixfilter/main.nf index db603c43..c31a45be 100644 --- a/modules/nf-core/custom/matrixfilter/main.nf +++ b/modules/nf-core/custom/matrixfilter/main.nf @@ -1,7 +1,7 @@ process CUSTOM_MATRIXFILTER { tag "$meta" label 'process_single' - conda 'modules/nf-core/custom/matrixfilter/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-base:4.2.1' : 'biocontainers/r-base:4.2.1' }" diff --git a/modules/nf-core/custom/matrixfilter/templates/matrixfilter.R b/modules/nf-core/custom/matrixfilter/templates/matrixfilter.R index 0bfff8cc..331f1a06 100644 --- a/modules/nf-core/custom/matrixfilter/templates/matrixfilter.R +++ b/modules/nf-core/custom/matrixfilter/templates/matrixfilter.R @@ -166,7 +166,7 @@ if (is.null(opt\$minimum_samples_not_na)) { # Define the tests tests <- list( - 'abundance' = function(x) sum(x > opt\$minimum_abundance, na.rm = T) >= opt\$minimum_samples, # check if rows have sufficiently high abundance + 'abundance' = function(x) sum(x >= opt\$minimum_abundance, na.rm = T) >= opt\$minimum_samples, # check if rows have sufficiently high abundance 'na' = function(x) !any(is.na(x)) || sum(!is.na(x)) >= opt\$minimum_samples_not_na # check if enough values in row are not NA ) diff --git a/modules/nf-core/custom/tabulartogseacls/environment.yml b/modules/nf-core/custom/tabulartogseacls/environment.yml index d9675fce..96c3276d 100644 --- a/modules/nf-core/custom/tabulartogseacls/environment.yml +++ b/modules/nf-core/custom/tabulartogseacls/environment.yml @@ -1,3 +1,4 @@ +name: custom_tabulartogseacls channels: - conda-forge - bioconda diff --git a/modules/nf-core/custom/tabulartogseacls/main.nf b/modules/nf-core/custom/tabulartogseacls/main.nf index 58fb5d08..5e6fd37a 100644 --- a/modules/nf-core/custom/tabulartogseacls/main.nf +++ b/modules/nf-core/custom/tabulartogseacls/main.nf @@ -2,7 +2,7 @@ process CUSTOM_TABULARTOGSEACLS { tag "$meta.id" label 'process_single' - conda 'modules/nf-core/custom/tabulartogseacls/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" @@ -27,8 +27,8 @@ process CUSTOM_TABULARTOGSEACLS { """ cls_file=${prefix}.cls - column_number=\$(cat $samples | head -n 1 | tr '$separator' "\\n" | grep -En "^$variable" | awk -F':' '{print \$1}') - classes=\$(tail -n +2 $samples | awk -F'$separator' '{print \$'\$column_number'}') + column_number=\$(cat $samples | head -n 1 | tr '$separator' "\\n" | grep -En "^$variable\$" | awk -F':' '{print \$1}') + classes=\$(tail -n +2 $samples | awk -F'$separator' '{print \$'\$column_number'}' | sed 's/^\$/empty/g') unique_classes=\$(echo -e "\$classes" | awk '!x[\$0]++') echo -e "\$(echo -e \"\$classes\" | wc -l) \$(echo -e \"\$unique_classes\" | wc -l) 1" > \$cls_file diff --git a/modules/nf-core/custom/tabulartogseagct/environment.yml b/modules/nf-core/custom/tabulartogseagct/environment.yml index d9675fce..7d4486f4 100644 --- a/modules/nf-core/custom/tabulartogseagct/environment.yml +++ b/modules/nf-core/custom/tabulartogseagct/environment.yml @@ -3,4 +3,4 @@ channels: - bioconda - defaults dependencies: - - conda-forge::coreutils=9.1 + - bioconda::multiqc=1.21 diff --git a/modules/nf-core/deseq2/differential/main.nf b/modules/nf-core/deseq2/differential/main.nf index 3c797f7b..ab7bc06a 100644 --- a/modules/nf-core/deseq2/differential/main.nf +++ b/modules/nf-core/deseq2/differential/main.nf @@ -1,6 +1,6 @@ process DESEQ2_DIFFERENTIAL { tag "$meta" - label 'process_medium' + label 'process_single' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/gprofiler2/gost/environment.yml b/modules/nf-core/gprofiler2/gost/environment.yml new file mode 100644 index 00000000..65e5d8f2 --- /dev/null +++ b/modules/nf-core/gprofiler2/gost/environment.yml @@ -0,0 +1,8 @@ +name: gprofiler2_gost +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::r-ggplot2=3.4.3 + - conda-forge::r-gprofiler2=0.2.2 diff --git a/modules/nf-core/gprofiler2/gost/main.nf b/modules/nf-core/gprofiler2/gost/main.nf new file mode 100644 index 00000000..acb18b93 --- /dev/null +++ b/modules/nf-core/gprofiler2/gost/main.nf @@ -0,0 +1,31 @@ +process GPROFILER2_GOST { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-3712554873398d849d0d11b22440f41febbc4ede:aa19bb8afc0ec6456a4f3cd650f7577c3bbdd4f3-0': + 'biocontainers/mulled-v2-3712554873398d849d0d11b22440f41febbc4ede:aa19bb8afc0ec6456a4f3cd650f7577c3bbdd4f3-0' }" + + input: + tuple val(meta), path(de_file) + path(gmt_file) + path(background_file) + + output: + tuple val(meta), path("*.gprofiler2.all_enriched_pathways.tsv") , emit: all_enrich + tuple val(meta), path("*.gprofiler2.gost_results.rds") , emit: rds , optional: true + tuple val(meta), path("*.gprofiler2.gostplot.png") , emit: plot_png , optional: true + tuple val(meta), path("*.gprofiler2.gostplot.html") , emit: plot_html , optional: true + tuple val(meta), path("*.gprofiler2.*.sub_enriched_pathways.tsv") , emit: sub_enrich , optional: true + tuple val(meta), path("*.gprofiler2.*.sub_enriched_pathways.png") , emit: sub_plot , optional: true + tuple val(meta), path("*ENSG_filtered.gmt") , emit: filtered_gmt, optional: true + tuple val(meta), path("*R_sessionInfo.log") , emit: session_info + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + template 'gprofiler2_gost.R' +} diff --git a/modules/nf-core/gprofiler2/gost/meta.yml b/modules/nf-core/gprofiler2/gost/meta.yml new file mode 100644 index 00000000..a2789f27 --- /dev/null +++ b/modules/nf-core/gprofiler2/gost/meta.yml @@ -0,0 +1,102 @@ +name: "gprofiler2_gost" +description: runs a functional enrichment analysis with gprofiler2 +keywords: + - gene set analysis + - enrichment + - gprofiler2 + - gost + - gene set +tools: + - "gprofiler2": + description: "An R interface corresponding to the 2019 update of g:Profiler web tool." + homepage: "https://biit.cs.ut.ee/gprofiler/page/r" + documentation: "https://rdrr.io/cran/gprofiler2/" + tool_dev_url: "https://gl.cs.ut.ee/biit/r-gprofiler2" + doi: "10.1093/nar/gkad347" + licence: ["GPL v2"] + +input: + - meta: + type: map + description: | + Groovy Map containing contrast information, e.g. [ variable:'treatment', reference:'treated', control:'saline', blocking:'' ] + - de_file: + type: file + pattern: "*.{csv,tsv}" + description: | + CSV or TSV-format tabular file with differential analysis outputs + - contrast_variable: + type: string + description: | + The contrast variable that is being investigated in DE analysis, e.g. "treatment". + - reference: + type: string + description: | + The contrast level of the reference samples, e.g. "control" + - target: + type: string + description: | + The contrast level of the target samples, e.g. "treated" + - background_file: + type: file + pattern: "*.{csv,tsv,txt}" + description: | + Path to a CSV/TSV/TXT file listing gene IDs that should be used as the background (will override count_file). This can be an expression matrix (see also background_column parameter); if so, will only consider those genes with an expression value > 0 in at least one sample. Alternatively, this can be a TXT file containing only a list of gene IDs. + - gmt_file: + type: file + pattern: "*.gmt" + description: | + Path to a GMT file downloaded from g:profiler that should be queried instead of the online databases + +output: + - meta: + type: map + description: | + Groovy Map containing contrast information, e.g. [ variable:'treatment', reference:'treated', control:'saline', blocking:'' ] + - all_enrich: + type: file + description: | + TSV file; table listing all enriched pathways that were found. This table will always be created (empty if no enrichment was found), the other output files are only created if enriched pathways were found + pattern: "*gprofiler2.*all_enriched_pathways.tsv" + - rds: + type: file + description: | + RDS file; R object containing the results of the gost query + pattern: "*gprofiler2.*gost_results.rds" + - plot_png: + type: file + description: | + PNG file; Manhattan plot of all enriched pathways + pattern: "*gprofiler2.*gostplot.png" + - plot_html: + type: file + description: | + HTML file; interactive Manhattan plot of all enriched pathways + pattern: "*gprofiler2.*gostplot.html" + - sub_enrich: + type: file + description: | + TSV file; table listing enriched pathways that were found from one particular source + pattern: "*gprofiler2.*sub_enriched_pathways.tsv" + - sub_plot: + type: file + description: | + PNG file; bar plot showing the fraction of genes that were found enriched in each pathway + pattern: "*gprofiler2.*sub_enriched_pathways.png" + - filtered_gmt: + type: file + description: | + GMT file that was provided as input or that was downloaded from g:profiler if no input GMT file was given; filtered for the selected datasources + pattern: "*ENSG_filtered.gmt" + - session_info: + type: file + description: | + Log file containing information about the R session that was run for this module + pattern: "*R_sessionInfo.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@WackerO" diff --git a/modules/nf-core/gprofiler2/gost/templates/gprofiler2_gost.R b/modules/nf-core/gprofiler2/gost/templates/gprofiler2_gost.R new file mode 100644 index 00000000..1de2a754 --- /dev/null +++ b/modules/nf-core/gprofiler2/gost/templates/gprofiler2_gost.R @@ -0,0 +1,462 @@ +#!/usr/bin/env Rscript + +# Written by Oskar Wacker (https://github.com/WackerO) in +# collaboration with Gisela Gabernet (https://github.com/ggabernet) +# Script template by Jonathan Manning (https://github.com/pinin4fjords) + +# MIT License + +# Copyright (c) QBiC + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +################################################ +################################################ +## Functions ## +################################################ +################################################ + +#' Parse out options from a string without recourse to optparse +#' +#' @param x Long-form argument list like --opt1 val1 --opt2 val2 +#' +#' @return named list of options and values similar to optparse + +parse_args <- function(x) { + args_list <- unlist(strsplit(x, ' ?--')[[1]])[-1] + args_vals <- lapply(args_list, function(x) scan(text=x, what='character', quiet = TRUE)) + + # Ensure the option vectors are length 2 (key/ value) to catch empty ones + args_vals <- lapply(args_vals, function(z) { length(z) <- 2; z}) + + parsed_args <- structure(lapply(args_vals, function(x) x[2]), names = lapply(args_vals, function(x) x[1])) + parsed_args[! is.na(parsed_args)] +} + +#' Flexibly read CSV or TSV files +#' +#' @param file Input file +#' @param header Passed to read.delim() +#' @param row.names Passed to read.delim() +#' +#' @return output Data frame + +read_delim_flexible <- function(file, header = TRUE, row.names = NULL, check.names = F) { + + ext <- tolower(tail(strsplit(basename(file), split = "\\\\.")[[1]], 1)) + + if (ext == "tsv" || ext == "txt") { + separator <- "\\t" + } else if (ext == "csv") { + separator <- "," + } else { + stop(paste("Unknown separator for", ext)) + } + + read.delim( + file, + sep = separator, + header = header, + row.names = row.names, + check.names = check.names + ) +} + +#' Round numeric dataframe columns to fixed decimal places by applying +#' formatting and converting back to numerics +#' +#' @param dataframe A data frame +#' @param columns Which columns to round (assumes all of them by default) +#' @param digits How many decimal places to round to? If -1, will return the unchanged input df +#' +#' @return output Data frame +round_dataframe_columns <- function(df, columns = NULL, digits = -1) { + if (digits == -1) { + return(df) # if -1, return df without rounding + } + + df <- data.frame(df, check.names = FALSE) # make data.frame from vector as otherwise, the format will get messed up + if (is.null(columns)) { + columns <- colnames(df)[(unlist(lapply(df, is.numeric), use.names=F))] # extract only numeric columns for rounding + } + + df[,columns] <- round( + data.frame(df[, columns], check.names = FALSE), + digits = digits + ) + + # Convert columns back to numeric + + for (c in columns) { + df[[c]][grep("^ *NA\$", df[[c]])] <- NA + df[[c]] <- as.numeric(df[[c]]) + } + df +} + +################################################ +################################################ +## PARSE PARAMETERS FROM NEXTFLOW ## +################################################ +################################################ + +# I've defined these in a single array like this so that we could go back to an +# optparse-driven method in future with module bin/ directories, rather than +# the template + +# Set defaults and classes +opt <- list( + de_file = '$de_file', + de_id_column = 'gene_id', + organism = NULL, + sources = NULL, + output_prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'), + significant = T, + measure_underrepresentation = F, + correction_method = 'gSCS', + evcodes = F, + pval_threshold = 0.05, + gmt_file = '$gmt_file', + token = NULL, + background_file = '$background_file', + background_column = NULL, + domain_scope = 'annotated', + min_diff = 1, + round_digits = -1, + palette_name = 'Blues' +) + +opt_types <- lapply(opt, class) + +# Apply parameter overrides + +args_opt <- parse_args('$task.ext.args') +for ( ao in names(args_opt)) { + if (! ao %in% names(opt)) { + stop(paste("Invalid option:", ao)) + } else { + + # Preserve classes from defaults where possible + if (! is.null(opt[[ao]])) { + args_opt[[ao]] <- as(args_opt[[ao]], opt_types[[ao]]) + } + opt[[ao]] <- args_opt[[ao]] + } +} +# Check if required parameters have been provided +required_opts <- c('output_prefix') +missing <- required_opts[unlist(lapply(opt[required_opts], is.null)) | ! required_opts %in% names(opt)] + +if (length(missing) > 0) { + stop(paste("Missing required options:", paste(missing, collapse=', '))) +} +if (is.null(opt\$organism) && opt\$gmt_file == "" && is.null(opt\$token)) { + stop('Please provide organism, gmt_file or token.') +} + +# Check file inputs are valid + +for (file_input in c('de_file')) { + if (is.null(opt[[file_input]])) { + stop(paste("Please provide", file_input), call. = FALSE) + } + + if (! file.exists(opt[[file_input]])) { + stop(paste0('Value of ', file_input, ': ', opt[[file_input]], ' is not a valid file')) + } +} + +################################################ +################################################ +## Finish loading libraries ## +################################################ +################################################ + +library(gprofiler2) +library(ggplot2) + +################################################ +################################################ +## READ IN DIFFERENTIAL GENES FILE ## +################################################ +################################################ + +de.genes <- + read_delim_flexible( + file = opt\$de_file + ) + +output_prefix <- paste0(opt\$output_prefix, ".gprofiler2") + +# Create empty output table as it is a mandatory output +file.create(paste(output_prefix, 'all_enriched_pathways', 'tsv', sep = '.')) + +if (nrow(de.genes) > 0) { + + query <- de.genes[[opt\$de_id_column]] + + ################################################ + ################################################ + # Run gprofiler processes and generate outputs # + ################################################ + ################################################ + + set.seed(1) # This will ensure that reruns have the same plot colors + + sources <- opt\$sources + if (!is.null(sources)) { + sources <- strsplit(opt\$sources, split = ",")[[1]] + } + if (!is.null(sources)) { + sources <- strsplit(opt\$sources, split = ",")[[1]] + } + + if (!is.null(opt\$token)) { + + # First check if a token was provided + token <- opt\$token + + } else if (!is.null(opt\$organism)) { + + # Next, check if organism was provided. Get the GMT file from gprofiler and save both the full file as well as the filtered one to metadata + gmt_url <- paste0("https://biit.cs.ut.ee/gprofiler//static/gprofiler_full_", opt\$organism, ".ENSG.gmt") + tryCatch( + { + gmt_path <- paste0("gprofiler_full_", opt\$organism, ".ENSG.gmt") + if (!is.null(sources)) { + gmt_path <- paste0("gprofiler_full_", opt\$organism, ".", paste(sources, collapse="_"), ".ENSG_filtered.gmt") + } + download <- download.file(gmt_url, gmt_path) + if (download != 0) { + print("Failed to fetch the GMT file from gprofiler with this URL:") + print(gmt_url) + print("For reproducibility reasons, try to download the GMT file manually by visiting https://biit.cs.ut.ee/gprofiler/gost, then selecting the correct organism and, in datasources, clicking 'combined ENSG.gmt'.") + } else { + if (!is.null(sources)) { + gmt <- Filter(function(line) any(startsWith(line, sources)), readLines(gmt_path)) + print(paste0("GMT file successfully downloaded and filtered. Please note that for some sources, the GMT file may not contain any entries as these cannot be retrieved from gprofiler; in this case, the GMT file may be completely empty.")) + writeLines(gmt, gmt_path) + } + } + }, + error=function(gost_error) { + print("Failed to fetch the GMT file from gprofiler with this URL:") + print(gmt_url) + print("Got error:") + print(gost_error) + print("For reproducibility reasons, please try to download the GMT file manually by visiting https://biit.cs.ut.ee/gprofiler/gost, then selecting the correct organism and, in datasources, clicking 'combined ENSG.gmt'. Then provide it to the pipeline with the parameter `--gmt_file`") + } + ) + token <- opt\$organism + + } else { + + # Last option: Use custom GMT file + gmt_path <- opt\$gmt_file + + # If sources are set, extract only requested entries (gprofiler will NOT filter automatically!) + if (!is.null(sources)) { + gmt <- Filter(function(line) any(startsWith(line, sources)), readLines(opt\$gmt)) + gmt_path <- paste0(strsplit(basename(opt\$gmt_file), split = "\\\\.")[[1]][[1]], ".", paste(sources, collapse="_"), "_filtered.gmt") + writeLines(gmt, gmt_path) + } + token <- upload_GMT_file(gmt_path) + + # Add gost ID to output GMT name so that it can be reused in future runs + file.rename(gmt_path, paste0(strsplit(basename(opt\$gmt_file), split = "\\\\.")[[1]][[1]], ".", paste(sources, collapse="_"), "_gostID_", token, "_filtered.gmt")) + + } + + + # If custom background_file was provided, read it + if (opt\$background_file != "") { + intensities_table <- read_delim_flexible( + file = opt\$background_file + ) + # If only 1 col, it is a list, not a matrix + if (ncol(intensities_table) == 1) { + background <- intensities_table[,1] # Extract first column from df + background <- append(background, colnames(intensities_table)[1]) # First entry was put into header, add it to vector + } else { + # Otherwise it's a matrix + # Set rownames to background_column if param was set + if (!is.null(opt\$background_column)) { + if (opt\$background_column %in% colnames(intensities_table)) { + rownames(intensities_table) <- intensities_table[[opt\$background_column]] + intensities_table[[opt\$background_column]] <- NULL + } else { + stop(paste0("Invalid background_column argument: ", opt\$background_column, + ". Valid columns are: ", paste(colnames(intensities_table), collapse=", "), ".")) + } + } else { + + # Otherwise set rownames to first column + rownames(intensities_table) <- intensities_table[,1] + intensities_table <- intensities_table[,-1] + } + + # Rownames are set, now remove non-numeric columns + nums <- unlist(lapply(intensities_table, is.numeric), use.names = FALSE) + intensities_table <- intensities_table[, nums] + # Keep only rownames which have abundance + background <- rownames(subset(intensities_table, rowSums(intensities_table, na.rm = TRUE)>0)) + } + } else { + background <- NULL + } + + # Name the query as it will otherwise be called 'query_1' which will also determine the gostplot title + q <- list(query) + names(q) <- c(output_prefix) + gost_results <- gost( + query=q, + organism=token, + significant=opt\$significant, + measure_underrepresentation=opt\$measure_underrepresentation, + correction_method=opt\$correction_method, + sources=sources, + evcodes=opt\$evcodes, + user_threshold=opt\$pval_threshold, + custom_bg=background, + domain_scope=opt\$domain_scope + ) + + if (!is.null(gost_results)) { + # Create interactive plot and save to HTML + interactive_plot <- gostplot(gost_results, capped=T, interactive=T) + + # Save interactive plot as HTML + htmlwidgets::saveWidget( + widget = interactive_plot, + file = paste(output_prefix, 'gostplot', 'html', sep = '.') + ) + + # Create a static plot and save to PNG + static_plot <- gostplot(gost_results, capped=T, interactive=F) + ggsave(plot = static_plot, filename = paste(output_prefix, 'gostplot', 'png', sep = '.'), width = 10, height = 7) + + # Subset gost results to those pathways with a min. number of differential features + gost_results\$result <- gost_results\$result[which(gost_results\$result\$intersection_size>=opt\$min_diff),] + + # annotate query size (number of differential features in contrast) + gost_results\$result\$original_query_size <- rep(length(as.character(de.genes\$Ensembl_ID)), nrow(gost_results\$result)) + + # R object for other processes to use + + saveRDS(gost_results, file = paste(output_prefix, 'gost_results.rds', sep = '.')) + + # Write full enrichment table (except parents column as that one throws an error) + + gost_results\$results <- data.frame( + round_dataframe_columns(gost_results\$result[,-which(names(gost_results\$result) == "parents")], digits=opt\$round_digits), + check.names = FALSE + ) + + write.table( + gost_results\$results, + file = paste(output_prefix, 'all_enriched_pathways', 'tsv', sep = '.'), + col.names = TRUE, + row.names = FALSE, + sep = '\t', + quote = FALSE + ) + + # Iterate over the enrichment results by source and save separate tables + for (df in split(gost_results\$result, gost_results\$result\$source)){ + + db_source <- df\$source[1] + df_subset <- data.frame( + Pathway_name = df\$term_name, + Pathway_code = df\$term_id, + DE_genes = df\$intersection_size, + Pathway_size = df\$term_size, + Fraction_DE = df\$recall, + Padj = df\$p_value, + DE_genes_names = df\$intersection + ) + df_subset <- data.frame( + round_dataframe_columns(df_subset, digits=opt\$round_digits), + check.names = FALSE + ) + write.table( + df_subset, + file = paste(output_prefix, db_source, 'sub_enriched_pathways', 'tsv', sep = '.'), + col.names = TRUE, + row.names = FALSE, + sep = '\t', + quote = FALSE + ) + + # For plot, shorten pathway names as they can get quite long (full name can be looked up in the table) + df_subset\$Pathway_name <- sapply(df_subset\$Pathway_name, substr, start=1, stop=50) + + # Extract 3 colors from the chosen palette (2 are sufficient, but brewer.pal has a minimum of 3); first and last will be used for plot + colors <- RColorBrewer::brewer.pal(3, opt\$palette_name) + + # Enriched pathways horizontal barplots of padj values + p <- ggplot(df_subset, aes(x=reorder(Pathway_name, Fraction_DE), y=Fraction_DE)) + + geom_bar(aes(fill=Padj), stat="identity", width = 0.7) + + geom_text(aes(label=paste0(df_subset\$DE_genes, "/", df_subset\$Pathway_size)), vjust=0.4, hjust=-0.2, size=3) + + theme(plot.title.position = "plot") + + coord_flip() + + scale_y_continuous(limits = c(0.00, 1.24), breaks = seq(0, 1.24, by = 0.25)) + + ggtitle(paste("Enriched", db_source, "pathways")) + + xlab("") + ylab("Enriched fraction (DE features / Pathway size)") + + scale_fill_continuous(high = colors[1], low = colors[3]) + + # Save plot with set width to ensure there is enough space for the labels; adapt height to nrow but limit it to 100 as there will be an error for too high values + ggsave(p, filename = paste(output_prefix, db_source, 'sub_enriched_pathways', 'png', sep = '.'), device = "png", width=10, height=min(100, 1.5+nrow(df_subset)*0.15), limitsize=F) + } + } +} else { + print("No differential features found, pathway enrichment analysis with gprofiler2 will be skipped.") +} + +################################################ +################################################ +## R SESSION INFO ## +################################################ +################################################ + +sink("R_sessionInfo.log") +print(sessionInfo()) +sink() + +################################################ +################################################ +## VERSIONS FILE ## +################################################ +################################################ + +r.version <- strsplit(version[['version.string']], ' ')[[1]][3] +gprofiler2.version <- as.character(packageVersion('gprofiler2')) +ggplot2.version <- as.character(packageVersion('ggplot2')) +writeLines( + c( + '"\${task.process}":', + paste(' r-base:', r.version), + paste(' r-ggplot2:', ggplot2.version), + paste(' r-gprofiler2:', gprofiler2.version) + ), +'versions.yml') + +################################################ +################################################ +################################################ +################################################ diff --git a/modules/nf-core/gsea/gsea/environment.yml b/modules/nf-core/gsea/gsea/environment.yml index 3158f736..cb3c41e7 100644 --- a/modules/nf-core/gsea/gsea/environment.yml +++ b/modules/nf-core/gsea/gsea/environment.yml @@ -1,3 +1,4 @@ +name: gsea_gsea channels: - conda-forge - bioconda diff --git a/modules/nf-core/gsea/gsea/main.nf b/modules/nf-core/gsea/gsea/main.nf index 17f4a2e8..6a060c46 100644 --- a/modules/nf-core/gsea/gsea/main.nf +++ b/modules/nf-core/gsea/gsea/main.nf @@ -2,7 +2,7 @@ process GSEA_GSEA { tag "$meta.id" label 'process_single' - conda 'modules/nf-core/gsea/gsea/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gsea:4.3.2--hdfd78af_0': 'biocontainers/gsea:4.3.2--hdfd78af_0' }" @@ -22,11 +22,11 @@ process GSEA_GSEA { tuple val(meta), path("*gsea_report_for_${target}.html") , emit: report_htmls_target tuple val(meta), path("*ranked_gene_list*.tsv") , emit: ranked_gene_list tuple val(meta), path("*gene_set_sizes.tsv") , emit: gene_set_sizes - tuple val(meta), path("*butterfly_plot.png") , emit: butterfly_plot tuple val(meta), path("*global_es_histogram.png") , emit: histogram tuple val(meta), path("*heat_map_1.png") , emit: heatmap tuple val(meta), path("*pvalues_vs_nes_plot.png") , emit: pvalues_vs_nes_plot tuple val(meta), path("*ranked_list_corr_2.png") , emit: ranked_list_corr + tuple val(meta), path("*butterfly_plot.png") , emit: butterfly_plot, optional: true tuple val(meta), path("*[!gene_set_size|gsea_report|ranked_gene_list]*.tsv"), emit: gene_set_tsv, optional: true tuple val(meta), path("*[!gsea_report|heat_map_corr_plot|index|pos_snapshot|neg_snapshot]*.html"), emit: gene_set_html, optional: true tuple val(meta), path("*[!butterfly|enplot|global_es_histogram|gset_rnd_es_dist|heat_map|pvalues_vs_nes_plot|ranked_list_corr]*.png"), emit: gene_set_heatmap, optional: true diff --git a/modules/nf-core/gsea/gsea/meta.yml b/modules/nf-core/gsea/gsea/meta.yml index 0ff81518..fe2d4c95 100644 --- a/modules/nf-core/gsea/gsea/meta.yml +++ b/modules/nf-core/gsea/gsea/meta.yml @@ -11,7 +11,7 @@ tools: homepage: "http://www.gsea-msigdb.org/gsea/index.jsp" documentation: "https://software.broadinstitute.org/cancer/software/gsea/wiki/index.php/Main_Page" doi: "10.1073/pnas.0506580102" - licence: "['BSD-3-clause']" + licence: ["BSD-3-clause"] input: - meta: type: map diff --git a/modules/nf-core/limma/differential/main.nf b/modules/nf-core/limma/differential/main.nf index 56e02950..384e4649 100644 --- a/modules/nf-core/limma/differential/main.nf +++ b/modules/nf-core/limma/differential/main.nf @@ -1,6 +1,6 @@ process LIMMA_DIFFERENTIAL { tag "$meta" - label 'process_medium' + label 'process_single' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/proteus/readproteingroups/environment.yml b/modules/nf-core/proteus/readproteingroups/environment.yml index bd44f38d..3ac338bc 100644 --- a/modules/nf-core/proteus/readproteingroups/environment.yml +++ b/modules/nf-core/proteus/readproteingroups/environment.yml @@ -1,3 +1,4 @@ +name: proteus_readproteingroups channels: - conda-forge - bioconda @@ -7,3 +8,4 @@ dependencies: - bioconda::r-proteus-bartongroup=0.2.16 - conda-forge::r-plotly=4.10.2 - bioconda::bioconductor-limma=3.54.0 + - conda-forge::r-ggplot2=3.4.4 diff --git a/modules/nf-core/proteus/readproteingroups/main.nf b/modules/nf-core/proteus/readproteingroups/main.nf index 34837410..d8e7c8a8 100644 --- a/modules/nf-core/proteus/readproteingroups/main.nf +++ b/modules/nf-core/proteus/readproteingroups/main.nf @@ -2,10 +2,10 @@ process PROTEUS_READPROTEINGROUPS { tag "$meta.id" label 'process_single' - conda 'modules/nf-core/proteus/readproteingroups/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-4e01206f2c47f56077f04e5d2d7b312f50513a1e:92abccefbeb09795ad6a93553b62a6ad3daaea48-0': - 'biocontainers/mulled-v2-4e01206f2c47f56077f04e5d2d7b312f50513a1e:92abccefbeb09795ad6a93553b62a6ad3daaea48-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-503e259d7d34ce533ce66c4c8871af4ab409db6d:1e504ef71c83943061a39b6260d826b988bfa56f-0': + 'biocontainers/mulled-v2-503e259d7d34ce533ce66c4c8871af4ab409db6d:1e504ef71c83943061a39b6260d826b988bfa56f-0' }" input: tuple val(meta), path(samplesheet), path(intensities) diff --git a/modules/nf-core/proteus/readproteingroups/meta.yml b/modules/nf-core/proteus/readproteingroups/meta.yml index 8034770a..4e67cf0c 100644 --- a/modules/nf-core/proteus/readproteingroups/meta.yml +++ b/modules/nf-core/proteus/readproteingroups/meta.yml @@ -11,7 +11,7 @@ tools: documentation: "https://rdrr.io/github/bartongroup/Proteus/" tool_dev_url: "https://github.com/bartongroup/Proteus" doi: "10.1101/416511" - licence: "['GPL v2']" + licence: ["GPL v2"] input: - meta: type: map diff --git a/modules/nf-core/proteus/readproteingroups/templates/proteus_readproteingroups.R b/modules/nf-core/proteus/readproteingroups/templates/proteus_readproteingroups.R index 5806971d..f1321714 100644 --- a/modules/nf-core/proteus/readproteingroups/templates/proteus_readproteingroups.R +++ b/modules/nf-core/proteus/readproteingroups/templates/proteus_readproteingroups.R @@ -54,10 +54,11 @@ parse_args <- function(x) { #' @param file Input file #' @param header Passed to read.delim() #' @param row.names Passed to read.delim() +#' @param nrows Passed to read.delim() #' #' @return output Data frame -read_delim_flexible <- function(file, header = TRUE, row.names = NULL, check.names = F) { +read_delim_flexible <- function(file, header = TRUE, row.names = NULL, check.names = F, nrows = -1) { ext <- tolower(tail(strsplit(basename(file), split = "\\\\.")[[1]], 1)) @@ -74,7 +75,8 @@ read_delim_flexible <- function(file, header = TRUE, row.names = NULL, check.nam sep = separator, header = header, row.names = row.names, - check.names = check.names + check.names = check.names, + nrows = nrows ) } @@ -192,7 +194,8 @@ library(proteus) intensities.table <- read_delim_flexible( file = opt\$intensities_file, - check.names = FALSE + check.names = FALSE, + nrows = 1 # Here, we are only interested in the header of the table ) sample.sheet <- @@ -218,16 +221,19 @@ sample.sheet\$condition <- sample.sheet[[opt\$contrast_variable]] measure.cols <- setNames(paste0(opt\$measure_col_prefix, sample.sheet[[opt\$sample_id_col]]), sample.sheet[[opt\$sample_id_col]]) +if (!any(measure.cols %in% colnames(intensities.table))) { + measure.cols <- setNames(paste0(paste0(opt\$measure_col_prefix, " "), sample.sheet[[opt\$sample_id_col]]), sample.sheet[[opt\$sample_id_col]]) +} + # Check that all samples specified in the input sheet are present in the intensities table -missing_columns <- paste0(opt\$measure_col_prefix, sample.sheet[[opt\$sample_id_col]]) -missing_columns <- missing_columns[!missing_columns %in% colnames(intensities.table)] +missing_columns <- measure.cols[!measure.cols %in% colnames(intensities.table)] if (length(missing_columns) > 0) { stop(paste( length(missing_columns), 'specified samples do not have a(n)', opt\$measure_col_prefix, - 'column in intensities table. The following columns are missing:', + 'column in intensities table (tried prefix both with and without adding a whitespace), please check the value of parameter --measure_col_prefix. The following columns are missing:', paste(missing_columns, collapse = ', ') )) } diff --git a/modules/nf-core/shinyngs/app/environment.yml b/modules/nf-core/shinyngs/app/environment.yml index 3bf455c0..0e6de401 100644 --- a/modules/nf-core/shinyngs/app/environment.yml +++ b/modules/nf-core/shinyngs/app/environment.yml @@ -1,6 +1,7 @@ +name: shinyngs_app channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::r-shinyngs=1.8.4 + - bioconda::r-shinyngs=1.8.8 diff --git a/modules/nf-core/shinyngs/app/main.nf b/modules/nf-core/shinyngs/app/main.nf index d3ce2ffa..ef05a863 100644 --- a/modules/nf-core/shinyngs/app/main.nf +++ b/modules/nf-core/shinyngs/app/main.nf @@ -13,10 +13,10 @@ process SHINYNGS_APP { // // Those values must then be set in your Nextflow secrets. - conda 'modules/nf-core/shinyngs/app/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.8.4--r43hdfd78af_0' : - 'biocontainers/r-shinyngs:1.8.4--r43hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.8.8--r43hdfd78af_0' : + 'biocontainers/r-shinyngs:1.8.8--r43hdfd78af_0' }" input: tuple val(meta), path(sample), path(feature_meta), path(assay_files) // Experiment-level info @@ -49,8 +49,23 @@ process SHINYNGS_APP { cat <<-END_VERSIONS > versions.yml "${task.process}": - r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') r-shinyngs: \$(Rscript -e "library(shinyngs); cat(as.character(packageVersion('shinyngs')))") END_VERSIONS """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: meta.id + + """ + mkdir -p $prefix + touch ${prefix}/data.rds + touch ${prefix}/app.R + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-shinyngs: \$(Rscript -e "library(shinyngs); cat(as.character(packageVersion('shinyngs')))") + END_VERSIONS + """ + } diff --git a/modules/nf-core/shinyngs/app/meta.yml b/modules/nf-core/shinyngs/app/meta.yml index e6ce81f8..8ff58e95 100644 --- a/modules/nf-core/shinyngs/app/meta.yml +++ b/modules/nf-core/shinyngs/app/meta.yml @@ -11,7 +11,7 @@ tools: homepage: "https://github.com/pinin4fjords/shinyngs" documentation: "https://rawgit.com/pinin4fjords/shinyngs/master/vignettes/shinyngs.html" tool_dev_url: "https://github.com/pinin4fjords/shinyngs" - licence: "['AGPL v3']" + licence: ["AGPL v3"] input: - meta: type: map diff --git a/modules/nf-core/shinyngs/app/tests/main.nf.test b/modules/nf-core/shinyngs/app/tests/main.nf.test new file mode 100644 index 00000000..39d6e27e --- /dev/null +++ b/modules/nf-core/shinyngs/app/tests/main.nf.test @@ -0,0 +1,133 @@ +nextflow_process { + + name "Test Process SHINYNGS_APP" + script "../main.nf" + process "SHINYNGS_APP" + + tag "modules" + tag "modules_nfcore" + tag "shinyngs" + tag "shinyngs/app" + + test("mouse - multi matrix") { + + config './nextflow.config' + + when { + process { + """ + expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/' + + expression_sample_sheet = file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true) + expression_feature_meta = file(expression_test_data_dir + 'SRP254919.gene_meta.tsv', checkIfExists: true) + raw_expression_matrix_file = file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) + expression_contrasts = file(expression_test_data_dir + 'SRP254919.contrasts.csv', checkIfExists: true) + expression_differential = file(expression_test_data_dir + 'SRP254919.salmon.merged.deseq2.results.tsv', checkIfExists: true) + + // Copy some inputs for testing the multi-matrix functionality + raw_expression_matrix_file.copyTo('normalised.tsv') + normalised_expression_matrix_file = file('normalised.tsv') + expression_differential.copyTo('second_contrast_stats.tsv') + second_contrast_stats = file('second_contrast_stats.tsv') + + contrast_stats_assay = Channel.value(1) + + input[0] = [ [ "id":"SRP254919" ], expression_sample_sheet, expression_feature_meta, [ raw_expression_matrix_file, normalised_expression_matrix_file ] ] + input[1] = [ [ "id":"SRP254919" ], expression_contrasts, [ expression_differential, second_contrast_stats ] ] + input[2] = contrast_stats_assay + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.app[0][1]).name, + process.out.app[0][2], + process.out.versions + ).match() } + ) + } + } + + test("mouse - single matrix") { + + config './nextflow.config' + + when { + process { + """ + expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/' + + expression_sample_sheet = file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true) + expression_feature_meta = file(expression_test_data_dir + 'SRP254919.gene_meta.tsv', checkIfExists: true) + raw_expression_matrix_file = file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) + expression_contrasts = file(expression_test_data_dir + 'SRP254919.contrasts.csv', checkIfExists: true) + expression_differential = file(expression_test_data_dir + 'SRP254919.salmon.merged.deseq2.results.tsv', checkIfExists: true) + + // Copy some inputs for testing the multi-matrix functionality + expression_differential.copyTo('second_contrast_stats.tsv') + second_contrast_stats = file('second_contrast_stats.tsv') + + contrast_stats_assay = Channel.value(1) + + input[0] = [ [ "id":"SRP254919" ], expression_sample_sheet, expression_feature_meta, [ raw_expression_matrix_file ] ] + input[1] = [ [ "id":"SRP254919" ], expression_contrasts, [ expression_differential, second_contrast_stats ] ] + input[2] = contrast_stats_assay + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.app[0][1]).name, + process.out.app[0][2], + process.out.versions + ).match() } + ) + } + } + + test("mouse - stub") { + + options "-stub" + + when { + process { + """ + expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/' + + expression_sample_sheet = file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true) + expression_feature_meta = file(expression_test_data_dir + 'SRP254919.gene_meta.tsv', checkIfExists: true) + raw_expression_matrix_file = file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) + expression_contrasts = file(expression_test_data_dir + 'SRP254919.contrasts.csv', checkIfExists: true) + expression_differential = file(expression_test_data_dir + 'SRP254919.salmon.merged.deseq2.results.tsv', checkIfExists: true) + + // Copy some inputs for testing the multi-matrix functionality + expression_differential.copyTo('second_contrast_stats.tsv') + second_contrast_stats = file('second_contrast_stats.tsv') + + contrast_stats_assay = Channel.value(1) + + input[0] = [ [ "id":"SRP254919" ], expression_sample_sheet, expression_feature_meta, [ raw_expression_matrix_file ] ] + input[1] = [ [ "id":"SRP254919" ], expression_contrasts, [ expression_differential, second_contrast_stats ] ] + input[2] = contrast_stats_assay + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.app[0][1]).name, + process.out.app[0][2], + process.out.versions + ).match() } + ) + } + } +} diff --git a/modules/nf-core/shinyngs/app/tests/main.nf.test.snap b/modules/nf-core/shinyngs/app/tests/main.nf.test.snap new file mode 100644 index 00000000..f87c17d7 --- /dev/null +++ b/modules/nf-core/shinyngs/app/tests/main.nf.test.snap @@ -0,0 +1,44 @@ +{ + "mouse - stub": { + "content": [ + "data.rds", + "app.R:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "versions.yml:md5,9a3135ae8ff362a9671b280dcc5781da" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T08:47:11.758494" + }, + "mouse - multi matrix": { + "content": [ + "data.rds", + "app.R:md5,bedcfc45b6cdcc2b8fe3627987e2b17a", + [ + "versions.yml:md5,9a3135ae8ff362a9671b280dcc5781da" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T08:46:37.144273" + }, + "mouse - single matrix": { + "content": [ + "data.rds", + "app.R:md5,bedcfc45b6cdcc2b8fe3627987e2b17a", + [ + "versions.yml:md5,9a3135ae8ff362a9671b280dcc5781da" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T08:46:57.227288" + } +} \ No newline at end of file diff --git a/modules/nf-core/shinyngs/app/tests/nextflow.config b/modules/nf-core/shinyngs/app/tests/nextflow.config new file mode 100644 index 00000000..b5fffed1 --- /dev/null +++ b/modules/nf-core/shinyngs/app/tests/nextflow.config @@ -0,0 +1,14 @@ +process { + //withName: test_shinyngs_app:SHINYNGS_APP { + // secret 'SHINYAPPS_TOKEN' + // secret 'SHINYAPPS_SECRET' + //} + withName: test_shinyngs_app:SHINYNGS_APP { + ext.prefix = { "${meta.id}_test" } + ext.args = { "--contrast_stats_assay 1" } + } + withName: test_shinyngs_app_multi_matrix:SHINYNGS_APP { + ext.prefix = { "${meta.id}_test" } + ext.args = { "--assay-names raw,normalised --contrast_stats_assay 2" } + } +} diff --git a/modules/nf-core/shinyngs/app/tests/tags.yml b/modules/nf-core/shinyngs/app/tests/tags.yml new file mode 100644 index 00000000..07331846 --- /dev/null +++ b/modules/nf-core/shinyngs/app/tests/tags.yml @@ -0,0 +1,2 @@ +shinyngs/app: + - "modules/nf-core/shinyngs/app/**" diff --git a/modules/nf-core/shinyngs/staticdifferential/environment.yml b/modules/nf-core/shinyngs/staticdifferential/environment.yml index 3bf455c0..bec57084 100644 --- a/modules/nf-core/shinyngs/staticdifferential/environment.yml +++ b/modules/nf-core/shinyngs/staticdifferential/environment.yml @@ -1,6 +1,7 @@ +name: shinyngs_staticdifferential channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::r-shinyngs=1.8.4 + - bioconda::r-shinyngs=1.8.8 diff --git a/modules/nf-core/shinyngs/staticdifferential/main.nf b/modules/nf-core/shinyngs/staticdifferential/main.nf index 2166079f..c61ccb4a 100644 --- a/modules/nf-core/shinyngs/staticdifferential/main.nf +++ b/modules/nf-core/shinyngs/staticdifferential/main.nf @@ -2,10 +2,10 @@ process SHINYNGS_STATICDIFFERENTIAL { tag "$meta.id" label 'process_single' - conda 'modules/nf-core/shinyngs/staticdifferential/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.8.4--r43hdfd78af_0' : - 'biocontainers/r-shinyngs:1.8.4--r43hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.8.8--r43hdfd78af_0' : + 'biocontainers/r-shinyngs:1.8.8--r43hdfd78af_0' }" input: tuple val(meta), path(differential_result) // Differential info: contrast and differential stats diff --git a/modules/nf-core/shinyngs/staticdifferential/meta.yml b/modules/nf-core/shinyngs/staticdifferential/meta.yml index 9b92ccf7..f49ff70b 100644 --- a/modules/nf-core/shinyngs/staticdifferential/meta.yml +++ b/modules/nf-core/shinyngs/staticdifferential/meta.yml @@ -11,7 +11,7 @@ tools: homepage: "https://github.com/pinin4fjords/shinyngs" documentation: "https://rawgit.com/pinin4fjords/shinyngs/master/vignettes/shinyngs.html" tool_dev_url: "https://github.com/pinin4fjords/shinyngs" - licence: "['AGPL v3']" + licence: ["AGPL v3"] input: - meta: type: map diff --git a/modules/nf-core/shinyngs/staticexploratory/environment.yml b/modules/nf-core/shinyngs/staticexploratory/environment.yml index 3bf455c0..1c923f1b 100644 --- a/modules/nf-core/shinyngs/staticexploratory/environment.yml +++ b/modules/nf-core/shinyngs/staticexploratory/environment.yml @@ -1,6 +1,7 @@ +name: shinyngs_staticexploratory channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::r-shinyngs=1.8.4 + - bioconda::r-shinyngs=1.8.8 diff --git a/modules/nf-core/shinyngs/staticexploratory/main.nf b/modules/nf-core/shinyngs/staticexploratory/main.nf index 4880095e..1a3104b3 100644 --- a/modules/nf-core/shinyngs/staticexploratory/main.nf +++ b/modules/nf-core/shinyngs/staticexploratory/main.nf @@ -2,10 +2,10 @@ process SHINYNGS_STATICEXPLORATORY { tag "$meta.id" label 'process_single' - conda 'modules/nf-core/shinyngs/staticexploratory/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.8.4--r43hdfd78af_0' : - 'biocontainers/r-shinyngs:1.8.4--r43hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.8.8--r43hdfd78af_0' : + 'biocontainers/r-shinyngs:1.8.8--r43hdfd78af_0' }" input: tuple val(meta), path(sample), path(feature_meta), path(assay_files) @@ -43,7 +43,29 @@ process SHINYNGS_STATICEXPLORATORY { cat <<-END_VERSIONS > versions.yml "${task.process}": - r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + r-shinyngs: \$(Rscript -e "library(shinyngs); cat(as.character(packageVersion('shinyngs')))") + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: meta.id + """ + mkdir -p ${prefix}/png ${prefix}/html + touch ${prefix}/png/boxplot.png + touch ${prefix}/html/boxplot.html + touch ${prefix}/png/density.png + touch ${prefix}/html/density.html + touch ${prefix}/png/pca2d.png + touch ${prefix}/html/pca3d.html + touch ${prefix}/png/pca3d.png + touch ${prefix}/html/pca2d.html + touch ${prefix}/png/mad_correlation.png + touch ${prefix}/html/mad_correlation.html + touch ${prefix}/png/sample_dendrogram.png + + cat <<-END_VERSIONS > versions.yml + "${task.process}": r-shinyngs: \$(Rscript -e "library(shinyngs); cat(as.character(packageVersion('shinyngs')))") END_VERSIONS """ diff --git a/modules/nf-core/shinyngs/staticexploratory/meta.yml b/modules/nf-core/shinyngs/staticexploratory/meta.yml index 7bb6a61b..5a1b49b2 100644 --- a/modules/nf-core/shinyngs/staticexploratory/meta.yml +++ b/modules/nf-core/shinyngs/staticexploratory/meta.yml @@ -12,7 +12,7 @@ tools: homepage: "https://github.com/pinin4fjords/shinyngs" documentation: "https://rawgit.com/pinin4fjords/shinyngs/master/vignettes/shinyngs.html" tool_dev_url: "https://github.com/pinin4fjords/shinyngs" - licence: "['AGPL v3']" + licence: ["AGPL v3"] input: - meta: type: map diff --git a/modules/nf-core/shinyngs/staticexploratory/tests/main.nf.test b/modules/nf-core/shinyngs/staticexploratory/tests/main.nf.test new file mode 100644 index 00000000..3338aba1 --- /dev/null +++ b/modules/nf-core/shinyngs/staticexploratory/tests/main.nf.test @@ -0,0 +1,154 @@ +nextflow_process { + + name "Test Process SHINYNGS_STATICEXPLORATORY" + script "../main.nf" + process "SHINYNGS_STATICEXPLORATORY" + + tag "modules" + tag "modules_nfcore" + tag "shinyngs" + tag "shinyngs/staticexploratory" + + test("mouse - defaults") { + + when { + process { + """ + expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/' + + expression_sample_sheet = file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true) + expression_feature_meta = file(expression_test_data_dir + 'SRP254919.gene_meta.tsv', checkIfExists: true) + expression_matrix_file = file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) + + input[0] = [ [ "id":"treatment" ], expression_sample_sheet, expression_feature_meta, [ expression_matrix_file ] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.boxplots_png[0][1]).name, + file(process.out.densities_png[0][1]).name, + file(process.out.mad_png[0][1]).name, + file(process.out.pca2d_png[0][1]).name, + file(process.out.pca3d_png[0][1]).name, + file(process.out.dendro[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("mouse - defaults - stub") { + + options "-stub" + + when { + process { + """ + expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/' + + expression_sample_sheet = file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true) + expression_feature_meta = file(expression_test_data_dir + 'SRP254919.gene_meta.tsv', checkIfExists: true) + expression_matrix_file = file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) + + input[0] = [ [ "id":"treatment" ], expression_sample_sheet, expression_feature_meta, [ expression_matrix_file ] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.boxplots_png[0][1]).name, + file(process.out.densities_png[0][1]).name, + file(process.out.mad_png[0][1]).name, + file(process.out.pca2d_png[0][1]).name, + file(process.out.pca3d_png[0][1]).name, + file(process.out.dendro[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("mouse - specify log") { + + config './nextflow_specify_log.conf' + + when { + process { + """ + expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/' + + expression_sample_sheet = file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true) + expression_feature_meta = file(expression_test_data_dir + 'SRP254919.gene_meta.tsv', checkIfExists: true) + expression_matrix_file = file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) + + input[0] = [ [ "id":"treatment" ], expression_sample_sheet, expression_feature_meta, [ expression_matrix_file ] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.boxplots_png[0][1]).name, + file(process.out.densities_png[0][1]).name, + file(process.out.mad_png[0][1]).name, + file(process.out.pca2d_png[0][1]).name, + file(process.out.pca3d_png[0][1]).name, + file(process.out.dendro[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("mouse - html") { + + config './nextflow_html.conf' + + when { + process { + """ + expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/' + + expression_sample_sheet = file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true) + expression_feature_meta = file(expression_test_data_dir + 'SRP254919.gene_meta.tsv', checkIfExists: true) + expression_matrix_file = file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) + + input[0] = [ [ "id":"treatment" ], expression_sample_sheet, expression_feature_meta, [ expression_matrix_file ] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.boxplots_png[0][1]).name, + file(process.out.densities_png[0][1]).name, + file(process.out.mad_png[0][1]).name, + file(process.out.pca2d_png[0][1]).name, + file(process.out.pca3d_png[0][1]).name, + file(process.out.dendro[0][1]).name, + path(process.out.boxplots_html[0][1]).readLines().contains('ENSMUSG00000027456","Gm37080'), + path(process.out.densities_html[0][1]).readLines().contains('-1.4916353753463203,-1.3078530974016225,-1.1240708194569247,-0.94028854151222707'), + path(process.out.mad_html[0][1]).readLines().contains('0,-0.74295280067699376,0.67449075947659531,-0.6744907594765952'), + path(process.out.pca2d_html[0][1]).readLines().contains('SRX8042381","SRX8042382'), + path(process.out.pca3d_html[0][1]).readLines().contains('SRX8042381","SRX8042382'), + process.out.versions + ).match() } + ) + } + + } +} diff --git a/modules/nf-core/shinyngs/staticexploratory/tests/main.nf.test.snap b/modules/nf-core/shinyngs/staticexploratory/tests/main.nf.test.snap new file mode 100644 index 00000000..b95d031c --- /dev/null +++ b/modules/nf-core/shinyngs/staticexploratory/tests/main.nf.test.snap @@ -0,0 +1,79 @@ +{ + "mouse - defaults - stub": { + "content": [ + "boxplot.png", + "density.png", + "mad_correlation.png", + "pca2d.png", + "pca3d.png", + "sample_dendrogram.png", + [ + "versions.yml:md5,526fbe61b95ad3a722d7470ca1874ca3" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T08:48:20.908769" + }, + "mouse - defaults": { + "content": [ + "boxplot.png", + "density.png", + "mad_correlation.png", + "pca2d.png", + "pca3d.png", + "sample_dendrogram.png", + [ + "versions.yml:md5,526fbe61b95ad3a722d7470ca1874ca3" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T08:48:06.589763" + }, + "mouse - specify log": { + "content": [ + "boxplot.png", + "density.png", + "mad_correlation.png", + "pca2d.png", + "pca3d.png", + "sample_dendrogram.png", + [ + "versions.yml:md5,526fbe61b95ad3a722d7470ca1874ca3" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T08:48:41.352789" + }, + "mouse - html": { + "content": [ + "boxplot.png", + "density.png", + "mad_correlation.png", + "pca2d.png", + "pca3d.png", + "sample_dendrogram.png", + false, + false, + false, + false, + false, + [ + "versions.yml:md5,526fbe61b95ad3a722d7470ca1874ca3" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T08:49:04.969108" + } +} \ No newline at end of file diff --git a/modules/nf-core/shinyngs/staticexploratory/tests/nextflow.config b/modules/nf-core/shinyngs/staticexploratory/tests/nextflow.config new file mode 100644 index 00000000..399ac590 --- /dev/null +++ b/modules/nf-core/shinyngs/staticexploratory/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: 'test_shinyngs_staticexploratory_specify_log:SHINYNGS_STATICEXPLORATORY' { + ext.args = { "--log2_assays '1'" } + } + withName: 'test_shinyngs_staticexploratory_html:SHINYNGS_STATICEXPLORATORY' { + ext.args = { "--write_html" } + } +} \ No newline at end of file diff --git a/modules/nf-core/shinyngs/staticexploratory/tests/nextflow_html.conf b/modules/nf-core/shinyngs/staticexploratory/tests/nextflow_html.conf new file mode 100644 index 00000000..b7a293a4 --- /dev/null +++ b/modules/nf-core/shinyngs/staticexploratory/tests/nextflow_html.conf @@ -0,0 +1,5 @@ +process { + withName: 'SHINYNGS_STATICEXPLORATORY' { + ext.args = { "--write_html" } + } +} diff --git a/modules/nf-core/shinyngs/staticexploratory/tests/nextflow_specify_log.conf b/modules/nf-core/shinyngs/staticexploratory/tests/nextflow_specify_log.conf new file mode 100644 index 00000000..0872df66 --- /dev/null +++ b/modules/nf-core/shinyngs/staticexploratory/tests/nextflow_specify_log.conf @@ -0,0 +1,5 @@ +process { + withName: 'SHINYNGS_STATICEXPLORATORY' { + ext.args = { "--log2_assays '1'" } + } +} diff --git a/modules/nf-core/shinyngs/staticexploratory/tests/tags.yml b/modules/nf-core/shinyngs/staticexploratory/tests/tags.yml new file mode 100644 index 00000000..8e507165 --- /dev/null +++ b/modules/nf-core/shinyngs/staticexploratory/tests/tags.yml @@ -0,0 +1,2 @@ +shinyngs/staticexploratory: + - "modules/nf-core/shinyngs/staticexploratory/**" diff --git a/modules/nf-core/shinyngs/validatefomcomponents/environment.yml b/modules/nf-core/shinyngs/validatefomcomponents/environment.yml index 3bf455c0..4f3067bc 100644 --- a/modules/nf-core/shinyngs/validatefomcomponents/environment.yml +++ b/modules/nf-core/shinyngs/validatefomcomponents/environment.yml @@ -1,6 +1,7 @@ +name: shinyngs_validatefomcomponents channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::r-shinyngs=1.8.4 + - bioconda::r-shinyngs=1.8.8 diff --git a/modules/nf-core/shinyngs/validatefomcomponents/main.nf b/modules/nf-core/shinyngs/validatefomcomponents/main.nf index 68943f1e..fad3948a 100644 --- a/modules/nf-core/shinyngs/validatefomcomponents/main.nf +++ b/modules/nf-core/shinyngs/validatefomcomponents/main.nf @@ -2,10 +2,10 @@ process SHINYNGS_VALIDATEFOMCOMPONENTS { tag "$sample" label 'process_single' - conda 'modules/nf-core/shinyngs/validatefomcomponents/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.8.4--r43hdfd78af_0' : - 'biocontainers/r-shinyngs:1.8.4--r43hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.8.8--r43hdfd78af_0' : + 'biocontainers/r-shinyngs:1.8.8--r43hdfd78af_0' }" input: tuple val(meta), path(sample), path(assay_files) diff --git a/modules/nf-core/shinyngs/validatefomcomponents/meta.yml b/modules/nf-core/shinyngs/validatefomcomponents/meta.yml index fe1672a0..9066ada4 100644 --- a/modules/nf-core/shinyngs/validatefomcomponents/meta.yml +++ b/modules/nf-core/shinyngs/validatefomcomponents/meta.yml @@ -11,7 +11,7 @@ tools: homepage: "https://github.com/pinin4fjords/shinyngs" documentation: "https://rawgit.com/pinin4fjords/shinyngs/master/vignettes/shinyngs.html" tool_dev_url: "https://github.com/pinin4fjords/shinyngs" - licence: "['AGPL v3']" + licence: ["AGPL v3"] input: - meta: type: map diff --git a/nextflow.config b/nextflow.config index 5c678174..6390c9bf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,19 +22,21 @@ params { sizefactors_from_controls = false // Reporting - logo_file = "${projectDir}/docs/images/nf-core-differentialabundance_logo_light.png" - css_file = "${projectDir}/assets/nf-core_style.css" - citations_file = "${projectDir}/CITATIONS.md" - report_file = "${projectDir}/assets/differentialabundance_report.Rmd" - report_title = null - report_author = null - report_description = null - report_scree = true + logo_file = "$projectDir/docs/images/nf-core-differentialabundance_logo_light.png" + css_file = "$projectDir/assets/nf-core_style.css" + citations_file = "$projectDir/CITATIONS.md" + report_file = "$projectDir/assets/differentialabundance_report.Rmd" + report_title = null + report_author = null + report_contributors = null + report_description = null + report_scree = true + report_round_digits = 4 // Sample sheet options observations_type = 'sample' observations_id_col = 'sample' - observations_name_col = 'sample' + observations_name_col = null // Feature options features = null @@ -42,7 +44,6 @@ params { features_id_col = 'gene_id' features_name_col = 'gene_name' features_metadata_cols = 'gene_id,gene_name,gene_biotype' - features_log2_assays = null // GTF parsing options features_gtf_feature_type = 'transcript' @@ -61,18 +62,19 @@ params { affy_build_annotation = true // Proteus-specific options - proteus_measurecol_prefix = 'LFQ intensity ' + proteus_measurecol_prefix = 'LFQ intensity' proteus_norm_function = 'normalizeMedian' proteus_plotsd_method = 'violin' proteus_plotmv_loess = true proteus_palette_name = 'Set1' - proteus_round_digits = -1 // Filtering options - filtering_min_samples = 1 - filtering_min_abundance = 1 - filtering_min_proportion = null - filtering_grouping_var = null + filtering_min_samples = 1.0 + filtering_min_abundance = 1.0 + filtering_min_proportion = null + filtering_grouping_var = null + filtering_min_proportion_not_na = 0.5 + filtering_min_samples_not_na = null // Exploratory options exploratory_main_variable = 'auto_pca' @@ -83,17 +85,18 @@ params { exploratory_mad_threshold = -5 exploratory_assay_names = "raw,normalised,variance_stabilised" exploratory_final_assay = "variance_stabilised" + exploratory_log2_assays = 'raw,normalised' exploratory_palette_name = 'Set1' // Differential options - differential_file_suffix = ".deseq2.results.tsv" + differential_file_suffix = null differential_feature_id_column = "gene_id" differential_feature_name_column = "gene_name" differential_fc_column = "log2FoldChange" differential_pval_column = "pvalue" differential_qval_column = "padj" - differential_min_fold_change = 2 - differential_max_pval = 1 + differential_min_fold_change = 2.0 + differential_max_pval = 1.0 differential_max_qval = 0.05 differential_foldchanges_logged = true differential_palette_name = 'Set1' @@ -129,7 +132,7 @@ params { limma_robust = false limma_winsor_tail_p = '0.05,0.1' limma_adjust_method = "BH" - limma_p_value = 1 + limma_p_value = 1.0 limma_lfc = 0 limma_confint = false @@ -155,7 +158,21 @@ params { gsea_save_rnd_lists = false gsea_zip_report = false - gsea_gene_sets = null + // gprofiler2 options + gprofiler2_run = false + gprofiler2_organism = null + gprofiler2_significant = true + gprofiler2_measure_underrepresentation = false + gprofiler2_correction_method = 'gSCS' + gprofiler2_sources = null + gprofiler2_evcodes = false + gprofiler2_max_qval = 0.05 + gprofiler2_token = null + gprofiler2_background_file = 'auto' + gprofiler2_background_column = null + gprofiler2_domain_scope = 'annotated' + gprofiler2_min_diff = 1 + gprofiler2_palette_name = 'Blues' // ShinyNGS shinyngs_build_app = true @@ -168,9 +185,12 @@ params { shinyngs_shinyapps_account = null shinyngs_shinyapps_app_name = null + // Gene set options + gene_sets_files = null + // References genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false // Boilerplate options @@ -191,7 +211,6 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null - // Max resource options // Defaults only, expecting to be overwritten @@ -219,7 +238,7 @@ try { } // Load nf-core/differentialabundance custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs! // try { // includeConfig "${params.custom_config_base}/pipeline/differentialabundance.config" // } catch (Exception e) { @@ -230,6 +249,7 @@ profiles { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -239,6 +259,7 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + channels = ['conda-forge', 'bioconda', 'defaults'] apptainer.enabled = false } mamba { @@ -254,16 +275,16 @@ profiles { } docker { docker.enabled = true - docker.userEmulation = true conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true @@ -321,6 +342,7 @@ profiles { test_nogtf { includeConfig 'conf/test_nogtf.config' } test_full { includeConfig 'conf/test_full.config' } affy { includeConfig 'conf/affy.config' } + maxquant { includeConfig 'conf/maxquant.config' } rnaseq { includeConfig 'conf/rnaseq.config' } soft {includeConfig 'conf/soft.config'} test_affy { includeConfig 'conf/test_affy.config' } @@ -338,7 +360,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Load igenomes.config if required @@ -361,6 +383,9 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true @@ -386,7 +411,7 @@ manifest { description = 'Differential abundance analysis' mainScript = 'main.nf' nextflowVersion = '!>=23.10.0' - version = '1.4.0' + version = '1.5.0' doi = '10.5281/zenodo.7568000' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 3011e2c9..7e329647 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -80,18 +80,18 @@ "type": "string", "fa_icon": "fas fa-border-all", "description": "(RNA-seq only): optional transcript length matrix with samples and genes as the abundance matrix", - "help_text": "if provided, this file willl be used to provide transcript lengths to DESeq2 to model length bias across samples" + "help_text": "If provided, this file willl be used to provide transcript lengths to DESeq2 to model length bias across samples" }, "affy_cel_files_archive": { "type": "string", - "default": "None", + "default": "null", "description": "Alternative to matrix: a compressed CEL files archive such as often found in GEO", "fa_icon": "fas fa-file-archive", "help_text": "Use this option to provide a raw archive of CEL files from Affymetrix arrays. Will be ignored if a matrix is specified." }, "querygse": { "type": "string", - "default": "None", + "default": "null", "description": "Use SOFT files from GEO by providing the GSE study identifier", "fa_icon": "fas fa-keyboard", "help_text": "Use this option to provide a GSE study identifier." @@ -119,8 +119,7 @@ }, "observations_name_col": { "type": "string", - "default": "sample", - "description": "Column in the sample sheet to be used as the display identifier for observations", + "description": "Column in the sample sheet to be used as the display identifier for observations. If unset, will use value of --observations_id_col.", "fa_icon": "fas fa-file-signature" } }, @@ -137,13 +136,13 @@ "features_id_col": { "type": "string", "default": "gene_id", - "description": "Feature ID attribute in the GTF file (e.g. the gene_id field)", + "description": "Feature ID attribute in the abundance table as well as in the GTF file (e.g. the gene_id field)", "fa_icon": "fas fa-address-card" }, "features_name_col": { "type": "string", "default": "gene_name", - "description": "Feature name attribute in the GTF file (e.g. the gene symbol field)", + "description": "Feature name attribute in the abundance table as well as in the GTF file (e.g. the gene symbol field)", "fa_icon": "fas fa-signature" }, "features_type": { @@ -191,11 +190,6 @@ "default": "gene_id", "description": "Where a GTF file is supplied, which field should go first in the converted output table", "fa_icon": "fas fa-fast-backward" - }, - "features_log2_assays": { - "type": "string", - "description": "Of which assays to compute the log2. Not necessary for maxquant data as this is controlled by the pipeline.", - "help_text": "Either comma-separated of assay positions, e.g. '[1,2,3]', or empty list '[]' to not log any assay. If not set, will guess which assays need to be logged (those with a maximum > 20)." } }, "required": ["features_id_col", "features_name_col", "features_type"], @@ -233,7 +227,7 @@ }, "affy_cdfname": { "type": "string", - "default": "None", + "default": "null", "description": "Used to specify the name of an alternative cdf package. If set to NULL, then the usual cdf package based on Affymetrix' mappings will be used.", "fa_icon": "fas fa-signature" }, @@ -269,8 +263,9 @@ "properties": { "proteus_measurecol_prefix": { "type": "string", - "default": "LFQ intensity ", - "description": "Prefix of the column names of the MaxQuant proteingroups table in which the intensity values are saved; the prefix has to be followed by the sample names that are also found in the samplesheet. Default: 'LFQ intensity '; take care to also consider trailing whitespace between prefix and samplenames." + "default": "LFQ intensity", + "description": "Prefix of the column names of the MaxQuant proteingroups table in which the intensity values are saved; the prefix has to be followed by the sample names that are also found in the samplesheet. Default: 'LFQ intensity'; will search for both the prefix as entered and the prefix followed by one whitespace.", + "help_text": "If the sample columns are e.g. called 'LFQ intensity sample1', 'LFQ intensity sample2' etc., please set this parameter to 'LFQ intensity'." }, "proteus_norm_function": { "type": "string", @@ -297,13 +292,9 @@ "help_text": "Check the content of `RColorBrewer::brewer.pal.info` from an R terminal for valid palette names.", "description": "Valid R palette name", "fa_icon": "fas fa-palette" - }, - "proteus_round_digits": { - "type": "number", - "default": -1.0, - "description": "Number of decimals to round the MaxQuant intensities to; default: -1 (will not round)." } - } + }, + "fa_icon": "fas fa-table" }, "filtering": { "title": "Filtering", @@ -333,6 +324,17 @@ "help_text": "The variable can be used to define groups and derive a minimum group size upon which to base minimum observation numbers. The rationale for this is to allow retention of features that might be present in only one group. Note that this is consciously NOT filtering with an explicit awareness of groups (\"feature must be present in all samples of group A\"), since this is known to create biases towards discovery of differential features.", "description": "An optional grouping variable to be used to calculate a min_samples value", "fa_icon": "fas fa-users" + }, + "filtering_min_proportion_not_na": { + "type": "number", + "default": 0.5, + "description": "A minimum proportion of observations, given as a number between 0 and 1, that must have a value (not NA) to retain the row/ feature (e.g. gene).", + "fa_icon": "fas fa-compress-alt" + }, + "filtering_min_samples_not_na": { + "type": "number", + "description": "Minimum observations that must have a value (not NA) to retain the row/ feature (e.g. gene). Overrides filtering_min_proportion_not_na.", + "fa_icon": "fas fa-percent" } }, "fa_icon": "fas fa-filter", @@ -386,7 +388,7 @@ "type": "string", "default": "raw,normalised,variance_stabilised", "hidden": true, - "description": "Specifies assay names to be used for matrices, platform-specific", + "description": "Specifies assay names to be used for matrices, platform-specific.", "fa_icon": "fas fa-file-signature" }, "exploratory_final_assay": { @@ -396,6 +398,11 @@ "description": "Specifies final assay to be used for exploratory analysis, platform-specific", "fa_icon": "fas fa-sort-down" }, + "exploratory_log2_assays": { + "type": "string", + "description": "Of which assays to compute the log2 during exploratory analysis. Not necessary for maxquant data as this is controlled by the pipeline.", + "help_text": "Either comma-separated of assay positions, e.g. '[1,2,3]', or empty list '[]' to not log any assay. If not set, will guess which assays need to be logged (those with a maximum > 20)." + }, "exploratory_palette_name": { "type": "string", "default": "Set1", @@ -421,8 +428,7 @@ "properties": { "differential_file_suffix": { "type": "string", - "default": ".deseq2.results.tsv", - "description": "The suffix associated tabular differential results tables", + "description": "Advanced option: the suffix associated tabular differential results tables. Will by default use the appropriate suffix according to the study_type.", "fa_icon": "fas fa-signature" }, "differential_feature_id_column": { @@ -493,7 +499,6 @@ } }, "required": [ - "differential_file_suffix", "differential_feature_id_column", "differential_fc_column", "differential_qval_column", @@ -572,7 +577,7 @@ "type": "string", "default": "BH", "description": "`pAdjustMethod` parameter passed to results()", - "help_text": "the method to use for adjusting p-values, see help in R for the p.adjust() function (via ?p.adjust). At time of writing available values were \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", \"fdr\", \"none\".", + "help_text": "the method to use for adjusting p-values, see help in R for the p.adjust() function (via ?p.adjust). At time of writing available values were \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", \"fdr\", \"null\".", "fa_icon": "fas fa-sliders-h" }, "deseq2_alpha": { @@ -641,19 +646,19 @@ }, "limma_spacing": { "type": "string", - "default": "None", + "default": "null", "fa_icon": "fas fa-people-arrows", "description": "passed to lmFit(), positive integer giving the spacing between duplicate occurrences of the same probe, spacing=1 for consecutive rows." }, "limma_block": { "type": "string", - "default": "None", + "default": "null", "fa_icon": "fas fa-cube", "description": "Sample sheet column to be used to derive a vector or factor specifying a blocking variable on the arrays" }, "limma_correlation": { "type": "string", - "default": "None", + "default": "null", "fa_icon": "fas fa-chart-line", "description": "passed to lmFit(), the inter-duplicate or inter-technical replicate correlation" }, @@ -710,7 +715,7 @@ "type": "string", "default": "BH", "description": "passed to topTable(), method used to adjust the p-values for multiple testing.", - "enum": ["holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"], + "enum": ["holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "null"], "fa_icon": "fas fa-hammer" }, "limma_p_value": { @@ -798,8 +803,8 @@ "type": "string", "default": "meandiv", "description": "Normalisation mode", - "enum": ["meandiv", "none"], - "help_text": "Normalization mode. Method used to normalize the enrichment scores across analyzed gene sets: 'meandiv' (default, GSEA normalizes the enrichment scores as described in Normalized Enrichment Score (NES)) OR 'none' (GSEA does not normalize the enrichment scores).\n\nSee 'advanced fields' at https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideFrame.html?Run_GSEA_Page", + "enum": ["meandiv", "null"], + "help_text": "Normalization mode. Method used to normalize the enrichment scores across analyzed gene sets: 'meandiv' (default, GSEA normalizes the enrichment scores as described in Normalized Enrichment Score (NES)) OR 'null' (GSEA does not normalize the enrichment scores).\n\nSee 'advanced fields' at https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideFrame.html?Run_GSEA_Page", "fa_icon": "fas fa-align-justify" }, "gsea_rnd_type": { @@ -854,12 +859,93 @@ "description": "Make a zipped file with all reports", "help_text": "Set to True (default=false) to create a zip file of the analysis results. The zip file is saved to the output folder with all of the other files generated by the analysis. This is useful for sharing analysis results", "fa_icon": "fas fa-file-archive" + } + }, + "fa_icon": "fas fa-layer-group" + }, + "gprofiler2": { + "title": "gprofiler2", + "type": "object", + "description": "", + "default": "", + "properties": { + "gprofiler2_run": { + "type": "boolean", + "description": "Set to run gprofiler2 and do a pathway enrichment analysis.", + "fa_icon": "fas fa-running" }, - "gsea_gene_sets": { + "gprofiler2_organism": { "type": "string", - "default": "None", - "description": "Gene sets in GMT or GMX-format (multiple comma-separated input files are possible)", - "fa_icon": "fas fa-bars" + "description": "Short name of the organism that is analyzed, e.g. hsapiens for homo sapiens.", + "help_text": "Set this to the short organism name consisting of the first letter of the genus and the full species name, e.g. hsapiens for Homo sapiens, mmusculus for Mus musculus. This has second priority and will be overridden by --gprofiler2_token." + }, + "gprofiler2_significant": { + "type": "boolean", + "default": true, + "description": "Should only significant enrichment results be considered?", + "help_text": "Default true; if false, will consider all enrichment results regardless of significance." + }, + "gprofiler2_measure_underrepresentation": { + "type": "boolean", + "default": false, + "description": "Should underrepresentation be measured instead of overrepresentation?", + "help_text": "Default false; if true, will measure overrepresentation." + }, + "gprofiler2_correction_method": { + "type": "string", + "description": "The method that should be used for multiple testing correction.", + "help_text": "One of gSCS (synonyms: analytical, g_SCS), fdr (synonyms: false_discovery_rate), bonferroni.", + "enum": ["gSCS", "analytical", "g_SCS", "fdr", "false_discovery_rate", "bonferroni"] + }, + "gprofiler2_sources": { + "type": "string", + "description": "On which source databases to run the gprofiler query", + "help_text": "GO, GO:MF, GO:BP, GO:CC, KEGG, REAC, WP, TF, MIRNA, HPA, CORUM, HP, or any comma-reparated combination thereof, e.g. 'KEGG,REAC'. This works if --gprofiler2_organism is used; if a GMT file is provided with --gene_sets_files, should also work; the module will then remove any lines not starting with any of the source names. Does not work for --gprofiler2_token as g:Profiler will not filter such a run." + }, + "gprofiler2_evcodes": { + "type": "boolean", + "default": false, + "description": "Whether to include evcodes in the results.", + "help_text": "This can decrease performance and make the query slower. See https://rdrr.io/cran/gprofiler2/man/gost.html" + }, + "gprofiler2_max_qval": { + "type": "number", + "default": 0.05, + "description": "Maximum q value used for significance testing." + }, + "gprofiler2_token": { + "type": "string", + "description": "Token that should be used as a query.", + "help_text": "For reproducibility, instead of querying the online databases, you can provide a token, e.g. from a previous pipeline run or from a manual query on https://biit.cs.ut.ee/gprofiler/gost. This has highest priority and will override --gprofiler2_organism and --gene_sets_files." + }, + "gprofiler2_background_file": { + "type": "string", + "pattern": "^\\S+\\.(csv|tsv|txt)$|auto|false", + "description": "Path to CSV/TSV/TXT file that should be used as a background for the query; alternatively, 'auto' (default) or 'false'.", + "help_text": "It is advisable to run pathway analysis with a set of background genes describing which genes exist in the target organism in the first place so that other genes are not at all considered. This parameter is by default set to 'auto', meaning that the filtered input abundance matrix will be used. Alternatively, you can provide a CSV/TSV table where one column contains gene IDs and the other rows contain abundance values, or a TXT file that simply contains one gene ID per line. If a custom CSV/TSV is used, all genes will be considered which had at least some abundance (i.e. sum of all abundance values in a row > 0). Set to 'false' if you do not want to use a background." + }, + "gprofiler2_background_column": { + "type": "string", + "description": "Which column to use as gene IDs in the background matrix.", + "help_text": "If a background matrix is provided but this parameter is not set, will assume that the first matrix column contains the IDs." + }, + "gprofiler2_domain_scope": { + "type": "string", + "default": "annotated", + "description": "How to calculate the statistical domain size.", + "help_text": "One of annotated (default), known, custom or custom_annotated; see https://rdrr.io/cran/gprofiler2/man/gost.html", + "enum": ["annotated", "known", "custom", "custom_annotated"] + }, + "gprofiler2_min_diff": { + "type": "integer", + "default": 1, + "description": "How many genes must be differentially expressed in a pathway for it to be considered enriched? Default 1." + }, + "gprofiler2_palette_name": { + "type": "string", + "default": "Blues", + "description": "Valid R palette name", + "help_text": "Check the content of `RColorBrewer::brewer.pal.info` from an R terminal for valid palette names." } }, "fa_icon": "fas fa-layer-group" @@ -884,13 +970,13 @@ }, "shinyngs_shinyapps_account": { "type": "string", - "default": "None", + "default": "null", "description": "Your shinyapps.io account name", "fa_icon": "fas fa-user" }, "shinyngs_shinyapps_app_name": { "type": "string", - "default": "None", + "default": "null", "description": "The name of the app to push to in your shinyapps.io account", "fa_icon": "fas fa-file-signature" }, @@ -903,6 +989,20 @@ }, "fa_icon": "fab fa-app-store-ios" }, + "gene_set_options": { + "title": "Options related to gene set analysis", + "type": "object", + "fa_icon": "fas fa-cogs", + "description": "Files and options used by gene set analysis modules.", + "properties": { + "gene_sets_files": { + "type": "string", + "default": "null", + "description": "Gene sets in GMT or GMX-format; for GSEA: multiple comma-separated input files in either format are possible. For gprofiler2: A single file in GMT format is possible; this has lowest priority and will be overridden by --gprofiler2_token and --gprofiler2_organism.", + "fa_icon": "fas fa-bars" + } + } + }, "reporting_options": { "title": "Reporting options", "type": "object", @@ -911,7 +1011,9 @@ "properties": { "report_file": { "type": "string", + "default": "${projectDir}/assets/differentialabundance_report.Rmd", "description": "Rmd report template from which to create the pipeline report", + "help_text": "The pipeline will always generate a default report which gives a good overview of the analysis results. Should this default report not suit your needs, you can provide the path to a custom report instead.", "format": "file-path", "pattern": "^\\S+\\.Rmd$", "fa_icon": "fas fa-book" @@ -925,37 +1027,42 @@ }, "logo_file": { "type": "string", - "default": "docs/images/nf-core-differentialabundance_logo_light.png", + "default": "${projectDir}/docs/images/nf-core-differentialabundance_logo_light.png", "description": "A logo to display in the report instead of the generic pipeline logo", "fa_icon": "far fa-font-awesome-logo-full" }, "css_file": { "type": "string", - "default": "assets/nf-core_style.css", + "default": "${projectDir}/assets/nf-core_style.css", "description": "CSS to use to style the output, in lieu of the default nf-core styling", "fa_icon": "far fa-file-code" }, "citations_file": { "type": "string", - "default": "CITATIONS.md", + "default": "${projectDir}/CITATIONS.md", "description": "A markdown file containing citations to include in the fiinal report", "fa_icon": "fas fa-ad" }, "report_title": { "type": "string", - "default": "None", + "default": "null", "fa_icon": "fas fa-heading", "description": "A title for reporting outputs" }, "report_author": { "type": "string", - "default": "None", + "default": "null", "fa_icon": "fas fa-user-edit", "description": "An author for reporting outputs" }, + "report_contributors": { + "type": "string", + "description": "Semicolon-separated string of contributor info that should be listed in the report.", + "help_text": "List here names, roles, affiliations, contact info etc. of contributors to your project. Entries of different contributors are separated by semicolons, linebreaks within a contributor are separated by \n. The first line of each contributor will be bold in the report. E.g.: 'Jane Doe\nDirector of Institute of Microbiology\nUniversity of Smallville;John Smith\nPhD student\nUniversity of Smallville'" + }, "report_description": { "type": "string", - "default": "None", + "default": "null", "fa_icon": "fas fa-feather", "description": "A description for reporting outputs" }, @@ -963,6 +1070,12 @@ "type": "boolean", "default": true, "description": "Whether to generate a scree plot in the report" + }, + "report_round_digits": { + "type": "integer", + "default": 4, + "description": "To how many digits should numeric output in different modules be rounded? If -1, will not round.", + "help_text": "This affects output from the following modules (both their tabular output and their result sections in the report): proteus, gprofiler2." } }, "required": ["report_file", "logo_file", "css_file"] @@ -1201,9 +1314,15 @@ { "$ref": "#/definitions/gsea" }, + { + "$ref": "#/definitions/gprofiler2" + }, { "$ref": "#/definitions/shiny_app_settings" }, + { + "$ref": "#/definitions/gene_set_options" + }, { "$ref": "#/definitions/reporting_options" }, diff --git a/pyproject.toml b/pyproject.toml index 0d62beb6..56110621 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,15 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff. # Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] +[tool.ruff] line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] +target-version = "py38" +cache-dir = "~/.cache/ruff" -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 +[tool.ruff.lint] +select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] + +[tool.ruff.lint.isort] +known-first-party = ["nf_core"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] diff --git a/subworkflows/local/utils_nfcore_differentialabundance_pipeline/main.nf b/subworkflows/local/utils_nfcore_differentialabundance_pipeline/main.nf new file mode 100644 index 00000000..07b557ed --- /dev/null +++ b/subworkflows/local/utils_nfcore_differentialabundance_pipeline/main.nf @@ -0,0 +1,228 @@ +// +// Subworkflow with functionality specific to the nf-core/differentialabundance pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + + emit: + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..ac31f28f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..68718e4f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..e3f0baf4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..ca964ce8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..14558c39 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,446 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + temp_doi_ref + "\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..1dc317f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..1037232c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 00000000..2585b65d --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 00000000..3d4a6b04 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 00000000..5784a33f --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..7626c1c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 00000000..60b1cfff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/workflows/differentialabundance.nf b/workflows/differentialabundance.nf index 9b96ba11..020c0747 100644 --- a/workflows/differentialabundance.nf +++ b/workflows/differentialabundance.nf @@ -1,20 +1,9 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PRINT PARAMS SUMMARY + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -WorkflowDifferentialabundance.initialise(params, log) - def checkPathParamList = [ params.input ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -29,11 +18,14 @@ if (params.study_type == 'affy_array'){ error("CEL files archive not specified!") } } else if (params.study_type == 'maxquant') { - + // Should the user have enabled --gsea_run, throw an error if (params.gsea_run) { error("Cannot run GSEA for maxquant data; please set --gsea_run to false.") } + if (params.gprofiler2_run){ + error("gprofiler2 pathway analysis is not yet possible with maxquant input data; please set --gprofiler2_run false and rerun pipeline!") + } if (!params.matrix) { error("Input matrix not specified!") } @@ -66,12 +58,24 @@ if (params.study_type == 'affy_array'){ // Check optional parameters if (params.transcript_length_matrix) { ch_transcript_lengths = Channel.of([ exp_meta, file(params.transcript_length_matrix, checkIfExists: true)]).first() } else { ch_transcript_lengths = [[],[]] } if (params.control_features) { ch_control_features = Channel.of([ exp_meta, file(params.control_features, checkIfExists: true)]).first() } else { ch_control_features = [[],[]] } -if (params.gsea_run) { - if (params.gsea_gene_sets){ - gene_sets_files = params.gsea_gene_sets.split(",") + +def run_gene_set_analysis = params.gsea_run || params.gprofiler2_run + +if (run_gene_set_analysis) { + if (params.gene_sets_files) { + gene_sets_files = params.gene_sets_files.split(",") ch_gene_sets = Channel.of(gene_sets_files).map { file(it, checkIfExists: true) } - } else { + if (params.gprofiler2_run && (!params.gprofiler2_token && !params.gprofiler2_organism) && gene_sets_files.size() > 1) { + error("gprofiler2 can currently only work with a single gene set file") + } + } else if (params.gsea_run) { error("GSEA activated but gene set file not specified!") + } else if (params.gprofiler2_run) { + if (!params.gprofiler2_token && !params.gprofiler2_organism) { + error("To run gprofiler2, please provide a run token, GMT file or organism!") + } + } else { + ch_gene_sets = [] // For methods that can run without gene sets } } @@ -94,6 +98,7 @@ citations_file = file(params.citations_file, checkIfExists: true) */ include { TABULAR_TO_GSEA_CHIP } from '../modules/local/tabular_to_gsea_chip' +include { FILTER_DIFFTABLE } from '../modules/local/filter_difftable' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -106,7 +111,6 @@ include { TABULAR_TO_GSEA_CHIP } from '../modules/local/tabular_to_gsea_chip' // include { GUNZIP as GUNZIP_GTF } from '../modules/nf-core/gunzip/main' include { UNTAR } from '../modules/nf-core/untar/main.nf' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' include { SHINYNGS_APP } from '../modules/nf-core/shinyngs/app/main' include { SHINYNGS_STATICEXPLORATORY as PLOT_EXPLORATORY } from '../modules/nf-core/shinyngs/staticexploratory/main' include { SHINYNGS_STATICDIFFERENTIAL as PLOT_DIFFERENTIAL } from '../modules/nf-core/shinyngs/staticdifferential/main' @@ -117,6 +121,7 @@ include { LIMMA_DIFFERENTIAL } from '../modules/n include { CUSTOM_MATRIXFILTER } from '../modules/nf-core/custom/matrixfilter/main' include { ATLASGENEANNOTATIONMANIPULATION_GTF2FEATUREANNOTATION as GTF_TO_TABLE } from '../modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/main' include { GSEA_GSEA } from '../modules/nf-core/gsea/gsea/main' +include { GPROFILER2_GOST } from '../modules/nf-core/gprofiler2/gost/main' include { CUSTOM_TABULARTOGSEAGCT } from '../modules/nf-core/custom/tabulartogseagct/main' include { CUSTOM_TABULARTOGSEACLS } from '../modules/nf-core/custom/tabulartogseacls/main' include { RMARKDOWNNOTEBOOK } from '../modules/nf-core/rmarkdownnotebook/main' @@ -125,6 +130,7 @@ include { AFFY_JUSTRMA as AFFY_JUSTRMA_NORM } from '../modules/n include { PROTEUS_READPROTEINGROUPS as PROTEUS } from '../modules/nf-core/proteus/readproteingroups/main' include { GEOQUERY_GETGEO } from '../modules/nf-core/geoquery/getgeo/main' include { ZIP as MAKE_REPORT_BUNDLE } from '../modules/nf-core/zip/main' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -132,9 +138,6 @@ include { ZIP as MAKE_REPORT_BUNDLE } from '../modules/n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow DIFFERENTIALABUNDANCE { // Set up some basic variables @@ -370,7 +373,7 @@ workflow DIFFERENTIALABUNDANCE { ch_control_features, ch_transcript_lengths ) - + // Let's make the simplifying assumption that the processed matrices from // the DESeq runs are the same across contrasts. We run the DESeq process // with matrices once for each contrast because DESeqDataSetFromMatrix() @@ -396,6 +399,16 @@ workflow DIFFERENTIALABUNDANCE { .map{ it.tail() } } + // We'll use a local module to filter the differential tables and create output files that contain only differential features + ch_logfc = Channel.value([ params.differential_fc_column, params.differential_min_fold_change ]) + ch_padj = Channel.value([ params.differential_qval_column, params.differential_max_qval ]) + + FILTER_DIFFTABLE( + ch_differential, + ch_logfc, + ch_padj + ) + // Run a gene set analysis where directed // Currently, we're letting GSEA work on the expression data. In future we @@ -454,6 +467,29 @@ workflow DIFFERENTIALABUNDANCE { .mix(GSEA_GSEA.out.versions) } + if (params.gprofiler2_run) { + + // For gprofiler2, use only features that are considered differential + ch_filtered_diff = FILTER_DIFFTABLE.out.filtered + + if (!params.gprofiler2_background_file) { + // If deactivated, use empty list as "background" + ch_background = [] + } else if (params.gprofiler2_background_file == "auto") { + // If auto, use input matrix as background + ch_background = CUSTOM_MATRIXFILTER.out.filtered.map{it.tail()}.first() + } else { + ch_background = Channel.from(file(params.gprofiler2_background_file, checkIfExists: true)) + } + + // For gprofiler2, token and organism have priority and will override a gene_sets file + + GPROFILER2_GOST( + ch_filtered_diff, + ch_gene_sets.first(), + ch_background + ) + } // The exploratory plots are made by coloring by every unique variable used // to define contrasts @@ -468,9 +504,9 @@ workflow DIFFERENTIALABUNDANCE { // normalised matrix, which can be passed through to downstream analysis if(params.study_type == "geo_soft_file") { - ch_mat = ch_norm + ch_mat = ch_norm }else{ - ch_mat = ch_raw.combine(ch_processed_matrices) + ch_mat = ch_raw.combine(ch_processed_matrices) } ch_all_matrices = VALIDATOR.out.sample_meta // meta, samples @@ -500,9 +536,12 @@ workflow DIFFERENTIALABUNDANCE { .mix(PLOT_EXPLORATORY.out.versions) .mix(PLOT_DIFFERENTIAL.out.versions) - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + // + // Collate and save software versions + // + + ch_collated_versions = softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'collated_versions.yml', sort: true, newLine: true) // Generate a list of files that will be used by the markdown report @@ -517,7 +556,7 @@ workflow DIFFERENTIALABUNDANCE { .map{ it.tail() } .map{it.flatten()} .combine(VALIDATOR.out.contrasts.map{it.tail()}) - .combine(CUSTOM_DUMPSOFTWAREVERSIONS.out.yml) + .combine(ch_collated_versions) .combine(ch_logo_file) .combine(ch_css_file) .combine(ch_citations_file) @@ -531,6 +570,14 @@ workflow DIFFERENTIALABUNDANCE { ) } + if (params.gprofiler2_run){ + ch_report_input_files = ch_report_input_files + .combine(GPROFILER2_GOST.out.plot_html.map{it[1]}.flatMap().toList()) + .combine(GPROFILER2_GOST.out.all_enrich.map{it[1]}.flatMap().toList()) + .combine(GPROFILER2_GOST.out.sub_enrich.map{it[1]}.flatMap().toList()) + GPROFILER2_GOST.out.plot_html + } + if (params.shinyngs_build_app){ // Make (and optionally deploy) the shinyngs app @@ -565,13 +612,23 @@ workflow DIFFERENTIALABUNDANCE { // Condition params reported on study type - def params_pattern = ~/^(report|study|observations|features|filtering|exploratory|differential|deseq2|gsea).*/ + def params_pattern = "report|gene_sets|study|observations|features|filtering|exploratory|differential" + if (params.study_type == 'rnaseq'){ + params_pattern += "|deseq2" + } if (params.study_type == 'affy_array' || params.study_type == 'geo_soft_file'){ - params_pattern = ~/^(report|study|observations|features|filtering|exploratory|differential|affy|limma|gsea).*/ + params_pattern += "|affy|limma" } if (params.study_type == 'maxquant'){ - params_pattern = ~/^(report|study|observations|features|filtering|exploratory|differential|proteus|affy|limma|gsea).*/ + params_pattern += "|proteus|limma" + } + if (params.gprofiler2_run){ + params_pattern += "|gprofiler2" } + if (params.gsea_run){ + params_pattern += "|gsea" + } + params_pattern = ~/(${params_pattern}).*/ ch_report_params = ch_report_input_files .map{ @@ -597,23 +654,6 @@ workflow DIFFERENTIALABUNDANCE { } -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.dump_parameters(workflow, params) - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } -} - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END