nf-core · maxulysse · Sep 22, 2020 · Sep 21, 2020 · Sep 21, 2020 · Sep 21, 2020
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,30 +1,50 @@
 name: nf-core CI
-# This workflow is triggered on pushes and PRs to the repository.
-# It runs the pipeline with the minimal test dataset to check that it completes without any syntax errors.
-on: [push, pull_request]
+# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors
+on:
+  push:
+    branches:
+      - dev
+  pull_request:
+  release:
+    types: [published]
 
 jobs:
   test:
+    name: Run workflow tests
+    # Only run on push if this is the nf-core dev branch (merged PRs)
+    if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/sarek') }}
+    runs-on: ubuntu-latest
     env:
       NXF_VER: ${{ matrix.nxf_ver }}
       NXF_ANSI_LOG: false
-    runs-on: ubuntu-latest
     strategy:
       matrix:
         # Nextflow versions: check pipeline minimum and current latest
-        nxf_ver: ['20.07.0-RC1', '']
+        nxf_ver: ['20.07.1', '']
     steps:
-      - uses: actions/checkout@v2
+      - name: Check out pipeline code
+        uses: actions/checkout@v2
+      - name: Check if Dockerfile or Conda environment changed
+        uses: technote-space/get-diff-action@v1
+        with:
+          PREFIX_FILTER: |
+            Dockerfile
+            environment.yml
+      - name: Build new docker image
+        if: env.GIT_DIFF
+        run: docker build --no-cache . -t nfcore/sarek:dev
+      - name: Pull docker image
+        if: ${{ !env.GIT_DIFF }}
+        run: |
+          docker pull nfcore/sarek:dev
+          docker tag nfcore/sarek:dev nfcore/sarek:dev
       - name: Install Nextflow
         run: |
           wget -qO- get.nextflow.io | bash
           sudo mv nextflow /usr/local/bin/
-      - name: Pull docker image
+      - name: Run pipeline with test data
         run: |
-          docker pull nfcore/sarek:dev
-          docker tag nfcore/sarek:dev nfcore/sarek:dev
-      - name: Run test
-        run: nextflow run ${GITHUB_WORKSPACE} -profile test,docker
+          nextflow run ${GITHUB_WORKSPACE} -profile test,docker
 
   annotation:
     env:
@@ -42,7 +62,7 @@ jobs:
           sudo mv nextflow /usr/local/bin/
         env:
           # Only check Nextflow pipeline minimum version
-          NXF_VER: '20.07.0-RC1'
+          NXF_VER: '20.07.1'
       - name: Pull docker image
         run: |
           docker pull nfcore/sarek:dev
@@ -65,7 +85,7 @@ jobs:
           sudo mv nextflow /usr/local/bin/
         env:
           # Only check Nextflow pipeline minimum version
-          NXF_VER: '20.07.0-RC1'
+          NXF_VER: '20.07.1'
       - name: Pull docker image
         run: docker pull nfcore/sarek:dev
       - name: Get test data
@@ -93,7 +113,7 @@ jobs:
           sudo mv nextflow /usr/local/bin/
         env:
           # Only check Nextflow pipeline minimum version
-          NXF_VER: '20.07.0-RC1'
+          NXF_VER: '20.07.1'
       - name: Pull docker image
         run: docker pull nfcore/sarek:dev
       - name: Run test for minimal genomes
@@ -105,7 +125,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        profile: [test_split_fastq, test_targeted, test_trimming, test_no_gatk_spark]
+        profile: [test_split_fastq, test_targeted, test_trimming, test_no_gatk_spark, test_umi_tso, test_umi_qiaseq]
     steps:
       - uses: actions/checkout@v2
       - name: Install Nextflow
@@ -114,12 +134,33 @@ jobs:
           sudo mv nextflow /usr/local/bin/
         env:
           # Only check Nextflow pipeline minimum version
-          NXF_VER: '20.07.0-RC1'
+          NXF_VER: '20.07.1'
       - name: Pull docker image
         run: docker pull nfcore/sarek:dev
       - name: Run ${{ matrix.profile }} test
         run: nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.profile }},docker
 
+  aligner:
+    env:
+      NXF_ANSI_LOG: false
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        aligner: [bwa-mem, bwa-mem2]
+    steps:
+      - uses: actions/checkout@v2
+      - name: Install Nextflow
+        run: |
+          wget -qO- get.nextflow.io | bash
+          sudo mv nextflow /usr/local/bin/
+        env:
+          # Only check Nextflow pipeline minimum version
+          NXF_VER: '20.07.1'
+      - name: Pull docker image
+        run: docker pull nfcore/sarek:dev
+      - name: Run ${{ matrix.profile }} test
+        run: nextflow run ${GITHUB_WORKSPACE} -profile test,docker --aligner ${{ matrix.aligner }}
+
   tools:
     env:
       NXF_ANSI_LOG: false
@@ -145,7 +186,7 @@ jobs:
           sudo mv nextflow /usr/local/bin/
         env:
           # Only check Nextflow pipeline minimum version
-          NXF_VER: '20.07.0-RC1'
+          NXF_VER: '20.07.1'
       - name: Pull docker image
         run: docker pull nfcore/sarek:dev
       - name: Run ${{ matrix.tool }} test

diff --git a/README.md b/README.md
@@ -2,19 +2,17 @@
 
 > **An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing**
 
-[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.07.0--RC1-brightgreen.svg)](https://www.nextflow.io/)
+[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.07.1-brightgreen.svg)](https://www.nextflow.io/)
 [![nf-core](https://img.shields.io/badge/nf--core-pipeline-brightgreen.svg)](https://nf-co.re/)
 [![DOI](https://zenodo.org/badge/184289291.svg)](https://zenodo.org/badge/latestdoi/184289291)
 
 [![GitHub Actions CI status](https://github.com/nf-core/sarek/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/sarek/actions?query=workflow%3A%22nf-core+CI%22)
 [![GitHub Actions Linting status](https://github.com/nf-core/sarek/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/sarek/actions?query=workflow%3A%22nf-core+linting%22)
 [![CircleCi build status](https://img.shields.io/circleci/project/github/nf-core/sarek?logo=circleci)](https://circleci.com/gh/nf-core/sarek/)
 
-[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/)
-[![Docker Container available](https://img.shields.io/docker/automated/nfcore/sarek.svg)](https://hub.docker.com/r/nfcore/sarek/)
-[![Install with Singularity](https://img.shields.io/badge/use%20with-singularity-purple.svg)](https://www.sylabs.io/docs/)
-
-[![Join us on Slack](https://img.shields.io/badge/slack-nfcore/sarek-blue.svg)](https://nfcore.slack.com/channels/sarek)
+[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](https://bioconda.github.io/)
+[![Docker](https://img.shields.io/docker/automated/nfcore/sarek.svg)](https://hub.docker.com/r/nfcore/sarek)
+[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23sarek-4A154B?logo=slack)](https://nfcore.slack.com/channels/sarek)
 
 ## Introduction
 
@@ -33,49 +31,31 @@ It's listed on [Elixir - Tools and Data Services Registry](https://bio.tools/Sar
 
 ## Quick Start
 
-i. Install [`Nextflow`](https://nf-co.re/usage/installation)
+1. Install [`Nextflow`](https://nf-co.re/usage/installation)
 
-ii. Install either [`Docker`](https://docs.docker.com/engine/installation/) or [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) for full pipeline reproducibility (please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))
+2. Install either [`Docker`](https://docs.docker.com/engine/installation/) or [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_
 
-iii. Download the pipeline and test it on a minimal dataset with a single command
+3. Download the pipeline and test it on a minimal dataset with a single command:
 
-```bash
-nextflow run nf-core/sarek -profile test,<docker/singularity/conda/institute>
-```
+    ```bash
+    nextflow run nf-core/sarek -profile test,<docker/singularity/conda/institute>
+    ```
 
-> Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute.
-> If so, you can simply use `-profile <institute>` in your command.
-> This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.
+    > Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute.
+    > If so, you can simply use `-profile <institute>` in your command.
+    > This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.
 
-iv. Start running your own analysis!
+4. Start running your own analysis!
 
-```bash
-nextflow run nf-core/sarek -profile <docker/singularity/conda/institute> --input '*.tsv' --genome GRCh38
-```
+    ```bash
+    nextflow run nf-core/sarek -profile <docker/singularity/conda/institute> --input '*.tsv' --genome GRCh38
+    ```
 
 See [usage docs](docs/usage.md) for all of the available options when running the pipeline.
 
 ## Documentation
 
-The nf-core/sarek pipeline comes with documentation about the pipeline, found in the `docs/` directory:
-
-1. [Installation](https://nf-co.re/usage/installation)
-2. Pipeline configuration
-    * [Local installation](https://nf-co.re/usage/local_installation)
-    * [Adding your own system config](https://nf-co.re/usage/adding_own_config)
-    * [Install on a secure cluster](docs/install_bianca.md)
-    * [Reference genomes](https://nf-co.re/usage/reference_genomes)
-    * [Extra documentation on reference](docs/reference.md)
-3. [Running the pipeline](docs/usage.md)
-    * [Examples](docs/use_cases.md)
-    * [Input files documentation](docs/input.md)
-    * [Documentation about containers](docs/containers.md)
-4. [Output and how to interpret the results](docs/output.md)
-    * [Extra documentation on variant calling](docs/variant_calling.md)
-    * [Complementary information about ASCAT](docs/ascat.md)
-    * [Complementary information about Sentieon](docs/sentieon.md)
-    * [Extra documentation on annotation](docs/annotation.md)
-5. [Troubleshooting](https://nf-co.re/usage/troubleshooting)
+The nf-core/sarek pipeline comes with documentation about the pipeline which you can read at [https://nf-core/sarek/docs](https://nf-core/sarek/docs) or find in the [`docs/` directory](docs).
 
 ## Credits
 
@@ -135,7 +115,7 @@ For further information or help, don't hesitate to get in touch on [Slack](https
 ## Citation
 
 If you use `nf-core/sarek` for your analysis, please cite the `Sarek` article as follows:
-> Garcia M, Juhos S, Larsson M et al. **Sarek: A portable workflow for whole-genome sequencing analysis of germline and somatic variants [version 1; peer review: 2 approved]** *F1000Research* 2020, 9:63 [doi: 10.12688/f1000research.16665.1](http://dx.doi.org/10.12688/f1000research.16665.1).
+> Garcia M, Juhos S, Larsson M et al. **Sarek: A portable workflow for whole-genome sequencing analysis of germline and somatic variants [version 2; peer review: 2 approved]** *F1000Research* 2020, 9:63 [doi: 10.12688/f1000research.16665.2](http://dx.doi.org/10.12688/f1000research.16665.2).
 
 You can cite the sarek zenodo record for a specific version using the following [doi: 10.5281/zenodo.3476426](https://zenodo.org/badge/latestdoi/184289291)
 

diff --git a/conf/modules.config b/conf/modules.config
@@ -18,6 +18,20 @@ params {
             publish_dir      = "trimgalore"
             publish_results  = "all"
         }
+        'bwa_index' {
+            args             = ""
+            suffix           = ""
+            publish_dir      = "genome/bwa_index"
+            publish_results  = "all"
+        }
+        'bwa_mem' {
+            args             = "-K 100000000 -M"
+            args2            = ""
+            extra            = ""
+            suffix           = ""
+            publish_dir      = ""
+            publish_results  = "all"
+        }
         'bwamem2_index' {
             args             = ""
             suffix           = ""

diff --git a/docs/README.md b/docs/README.md
@@ -1,21 +1,10 @@
 # nf-core/sarek: Documentation
 
-The nf-core/sarek documentation is split into the following files:
+The nf-core/sarek documentation is split into the following pages:
 
-1. [Installation](https://nf-co.re/usage/installation)
-2. Pipeline configuration
-    * [Local installation](https://nf-co.re/usage/local_installation)
-    * [Adding your own system config](https://nf-co.re/usage/adding_own_config)
-    * [Install on a secure cluster](install_bianca.md)
-    * [Reference genomes](https://nf-co.re/usage/reference_genomes)
-    * [Extra documentation on reference](reference.md)
-3. [Running the pipeline](usage.md)
-    * [Examples](use_cases.md)
-    * [Input files documentation](input.md)
-    * [Documentation about containers](containers.md)
-4. [Output and how to interpret the results](output.md)
-    * [Extra documentation on variant calling](variant_calling.md)
-    * [Complementary information about ASCAT](ascat.md)
-    * [Complementary information about Sentieon](sentieon.md)
-    * [Extra documentation on annotation](annotation.md)
-5. [Troubleshooting](https://nf-co.re/usage/troubleshooting)
+- [Usage](usage.md)
+  - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags.
+- [Output](output.md)
+  - An overview of the different results produced by the pipeline and how to interpret them.
+
+You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re)
diff --git a/docs/abstracts/2020-10-VCBS.md b/docs/abstracts/2020-10-VCBS.md
@@ -0,0 +1,36 @@
+# Victorian Cancer Bioinformatics Symposium - online, 2020-10-23
+
+## Sarek, a reproducible and portable workflow for analysis of matching tumor-normal NGS data
+
+Maxime Garcia [1], Szilveszter Juhos [1], Teresita Díaz de Ståhl [1], Markus Mayrhofer [2], Johanna Sandgren [1], Björn Nystedt [2], Monica Nistér [1]
+
+[1] Dept. of Oncology Pathology, The Swedish Childhood Tumor Biobank (Barntumörbanken, BTB); Karolinska Institutet
+[2] Dept. of Cell and Molecular Biology; National Bioinformatics Infrastructure Sweden, Science for Life Laboratory; Uppsala University
+
+### Introduction
+
+High throughput sequencing for precision medicine is a routine method.
+Numerous tools have to be used, and analysis is time consuming.
+We propose Sarek, an open-source container based bioinformatics workflow for germline or tumor/normal pairs (can include matched relapses), written in Nextflow, to process WGS, whole-exome or gene-panel samples.
+
+### Methods
+
+Sarek is part of nf-core, a collection of high quality peer-reviewed workflows; supported environments are Docker, Singularity and Conda, enabling version tracking and reproducibility.
+It is designed with flexible environments in mind: local fat node, HTC cluster or cloud environment like AWS.
+Several model organism references are available (including Human GRCh37 and GRCh38).
+Sarek is based on GATK best practices to prepare short-read data.
+The pipeline then reports germline and somatic SNVs and SVs (HaplotypeCaller, Strelka, Mutect2, Manta and TIDDIT).
+CNVs, purity and ploidy is estimated with ASCAT and Control-FREEC.
+At the end of the analysis the resulting VCF files can be annotated by SNPEff and/or VEP to facilitate further downstream processing.
+Furthermore, a broad set of QC metrics is reported as a final step of the workflow with MultiQC.
+Additional software can be included as new modules.
+
+### Results
+
+From FASTQs to annotated VCFs it takes four days for a paired 90X/90X WGS-sample on a 48 cores node, with the complete set of tools.
+Processing can be sped-up with the optional use of Sentieon (C).
+Sarek is used in production at the National Genomics Infrastructure Sweden for germline and cancer samples for the Swedish Childhood Tumor Biobank and other research groups.
+
+### Conclusion
+
+Sarek is an easy-to-use tool for germline or cancer NGS samples, to be downloaded from [nf-co.re/sarek](https://nf-co.re/sarek) under MIT license.