diff --git a/CHANGELOG.md b/CHANGELOG.md index b2fc20aeeb..3cb97d37a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,10 +8,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ### `Added` -- [#117](https://github.com/nf-core/sarek/pull/117) - Add `Trim Galore` possibilities to Sarek - [#76](https://github.com/nf-core/sarek/pull/76) - Add `GATK Spark` possibilities to Sarek - [#87](https://github.com/nf-core/sarek/pull/87) - Add `GATK BaseRecalibrator` plot to `MultiQC` report - [#115](https://github.com/nf-core/sarek/pull/115) - Add [@szilvajuhos](https://github.com/szilvajuhos) abstract for ESHG2020 +- [#117](https://github.com/nf-core/sarek/pull/117) - Add `Trim Galore` possibilities to Sarek ### `Changed` @@ -25,7 +25,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - [#113](https://github.com/nf-core/sarek/pull/113) - Move social preview image - [#120](https://github.com/nf-core/sarek/pull/120) - Sync TEMPLATE - [#121](https://github.com/nf-core/sarek/pull/121) - Update `MultiQC` to `1.8` -- [#126](https://github.com/nf-core/sarek/pull/126) - Update docs +- [#126](https://github.com/nf-core/sarek/pull/126), [#131](https://github.com/nf-core/sarek/pull/131) - Update docs +- [#131](https://github.com/nf-core/sarek/pull/131) - Use `nfcore/base:1.9` as base for containers +- [#131](https://github.com/nf-core/sarek/pull/131) - Update `Control-FREEC` to `11.5` +- [#131](https://github.com/nf-core/sarek/pull/131) - Update `FastQC` to `0.11.9` +- [#131](https://github.com/nf-core/sarek/pull/131) - Update `FreeBayes` to `1.3.2` +- [#131](https://github.com/nf-core/sarek/pull/131) - Update `Manta` to `1.6.0` +- [#131](https://github.com/nf-core/sarek/pull/131) - Update `Qualimap` to `2.2.2d` +- [#131](https://github.com/nf-core/sarek/pull/131) - Update `VEP` to `99.2` ### `Fixed` @@ -318,7 +325,7 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) - [#728](https://github.com/SciLifeLab/Sarek/pull/728) - `Strelka` Best Practices output is now prefixed with `StrelkaBP_` - [#728](https://github.com/SciLifeLab/Sarek/pull/728) - VCFs and Annotated VCFs are now ordered by Patient, then tools - [#732](https://github.com/SciLifeLab/Sarek/pull/732) - Merge `buildContainers.nf` and `buildReferences.nf` in `build.nf` -- [#732](https://github.com/SciLifeLab/Sarek/pull/732) - Reduce number of CPUs for `RunVEP` to `4` cf: [VEP docs](https://www.ensembl.org/info/docs/tools/vep/script/vep_other.html) +- [#732](https://github.com/SciLifeLab/Sarek/pull/732) - Reduce number of CPUs for `RunVEP` to `4` cf: [VEP docs](https://www.ensembl.org/info/docs/tools/vep/script/vep_other.html#faster) - [#732](https://github.com/SciLifeLab/Sarek/pull/732) - Update `VEP` from `95.1` to `95.2` ### `Removed` @@ -327,7 +334,7 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) - [#719](https://github.com/SciLifeLab/Sarek/pull/719) - `snpEff` base container is no longer used - [#721](https://github.com/SciLifeLab/Sarek/pull/721) - Remove `COSMIC` docs - [#728](https://github.com/SciLifeLab/Sarek/pull/728) - Remove `defineDirectoryMap()` -- [#732](https://github.com/SciLifeLab/Sarek/pull/732) - Remove `--database` option for VEP cf: [VEP docs](https://www.ensembl.org/info/docs/tools/vep/script/vep_other.html) +- [#732](https://github.com/SciLifeLab/Sarek/pull/732) - Remove `--database` option for VEP cf: [VEP docs](https://www.ensembl.org/info/docs/tools/vep/script/vep_other.html#faster) ### `Fixed` diff --git a/README.md b/README.md index 5fefc7d0df..8c73bcd706 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ It's built using [Nextflow](https://www.nextflow.io), a workflow tool to run tas It comes with docker containers making installation trivial and results highly reproducible.

- +

It's listed on the [Elixir - Tools and Data Services Registry](https://bio.tools/Sarek), [Dockstore](https://dockstore.org/workflows/github.com/nf-core/sarek) and [omicX - Bioinformatics tools](https://omictools.com/sarek-tool). @@ -73,7 +73,7 @@ The nf-core/sarek pipeline comes with documentation about the pipeline, found in ## Credits -Sarek was developed at the [National Genomics Infastructure](https://ngisweden.scilifelab.se) and [National Bioinformatics Infastructure Sweden](https://nbis.se) which are both platforms at [SciLifeLab](https://scilifelab.se), with the support of [The Swedish Childhood Tumor Biobank (Barntumörbanken)](ttps://ki.se/forskning/barntumorbanken-0). +Sarek was developed at the [National Genomics Infastructure](https://ngisweden.scilifelab.se) and [National Bioinformatics Infastructure Sweden](https://nbis.se) which are both platforms at [SciLifeLab](https://scilifelab.se), with the support of [The Swedish Childhood Tumor Biobank (Barntumörbanken)](https://ki.se/forskning/barntumorbanken). Main authors: @@ -109,7 +109,7 @@ For further information or help, don't hesitate to get in touch on [Slack](https ## Acknowledgements -[![Barntumörbanken](docs/images/BTB_logo.png)](https://ki.se/forskning/barntumorbanken-0) | [![SciLifeLab](docs/images/SciLifeLab_logo.png)](https://scilifelab.se) +[![Barntumörbanken](docs/images/BTB_logo.png)](https://ki.se/forskning/barntumorbanken) | [![SciLifeLab](docs/images/SciLifeLab_logo.png)](https://scilifelab.se) :-:|:-: [![National Genomics Infrastructure](docs/images/NGI_logo.png)](https://ngisweden.scilifelab.se/) | [![National Bioinformatics Infrastructure Sweden](docs/images/NBIS_logo.png)](https://nbis.se) diff --git a/containers/snpeff/Dockerfile b/containers/snpeff/Dockerfile index d08854d769..53ecdaf56f 100644 --- a/containers/snpeff/Dockerfile +++ b/containers/snpeff/Dockerfile @@ -1,13 +1,18 @@ -FROM nfcore/base:1.7 - +FROM nfcore/base:1.9 LABEL \ author="Maxime Garcia" \ description="snpEff image for use in nf-core/sarek" \ maintainer="maxime.garcia@scilifelab.se" +# Install the conda environment COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/sarek-snpeff-dev/bin:$PATH + +# Add conda installation dir to PATH (instead of doing 'conda activate') +ENV PATH /opt/conda/envs/nf-core-sarek-snpeff-dev/bin:$PATH + +# Dump the details of the installed packages to a file for posterity +RUN conda env export --name nf-core-sarek-snpeff-dev > nf-core-sarek-snpeff-dev.yml # Setup default ARG variables ARG GENOME=GRCh38 diff --git a/containers/snpeff/environment.yml b/containers/snpeff/environment.yml index e9b1f01e03..82ac3fe4f8 100644 --- a/containers/snpeff/environment.yml +++ b/containers/snpeff/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: sarek-snpeff-dev +name: nf-core-sarek-snpeff-dev channels: - conda-forge - bioconda diff --git a/containers/vep/Dockerfile b/containers/vep/Dockerfile index 695e5ef865..4bf89db555 100644 --- a/containers/vep/Dockerfile +++ b/containers/vep/Dockerfile @@ -1,13 +1,18 @@ -FROM nfcore/base:1.7 - +FROM nfcore/base:1.9 LABEL \ author="Maxime Garcia" \ description="VEP image for use in nf-core/sarek" \ maintainer="maxime.garcia@scilifelab.se" +# Install the conda environment COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/sarek-vep-dev/bin:$PATH + +# Add conda installation dir to PATH (instead of doing 'conda activate') +ENV PATH /opt/conda/envs/nf-core-sarek-vep-dev/bin:$PATH + +# Dump the details of the installed packages to a file for posterity +RUN conda env export --name nf-core-sarek-vep-dev > nf-core-sarek-vep-dev.yml # Setup default ARG variables ARG GENOME=GRCh38 @@ -19,7 +24,6 @@ RUN vep_install \ -a c \ -c .vep \ -s ${SPECIES} \ - -v ${VEP_VERSION} \ -y ${GENOME} \ --CACHE_VERSION ${VEP_VERSION} \ --CONVERT \ diff --git a/containers/vep/environment.yml b/containers/vep/environment.yml index d44a829ed8..a357210e74 100644 --- a/containers/vep/environment.yml +++ b/containers/vep/environment.yml @@ -1,11 +1,11 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: sarek-vep-dev +name: nf-core-sarek-vep-dev channels: - conda-forge - bioconda - defaults dependencies: - - ensembl-vep=95.2 + - ensembl-vep=99.2 - genesplicer=1.0 diff --git a/docs/containers.md b/docs/containers.md index ddf6de64e8..01ba9476f3 100644 --- a/docs/containers.md +++ b/docs/containers.md @@ -13,39 +13,41 @@ For annotation, the main container can be used, but the cache has to be download ### sarek [![sarek-docker status](https://img.shields.io/docker/automated/nfcore/sarek.svg)](https://hub.docker.com/r/nfcore/sarek) -- Based on `nfcore/base:latest` +- Based on `nfcore/base:1.9` - Contain **[ASCAT](https://github.com/Crick-CancerGenomics/ascat)** 2.5.2 - Contain **[AlleleCount](https://github.com/cancerit/alleleCount)** 4.0.2 - Contain **[BCFTools](https://github.com/samtools/bcftools)** 1.9 - Contain **[BWA](https://github.com/lh3/bwa)** 0.7.17 -- Contain **[Control-FREEC](https://github.com/BoevaLab/FREEC)** 11.4 -- Contain **[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/)** 0.11.8 -- Contain **[FreeBayes](https://github.com/ekg/freebayes)** 1.2.0 +- Contain **[Control-FREEC](https://github.com/BoevaLab/FREEC)** 11.5 +- Contain **[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/)** 0.11.9 +- Contain **[FreeBayes](https://github.com/ekg/freebayes)** 1.3.2 - Contain **[GATK4-spark](https://github.com/broadinstitute/gatk)** 4.1.4.1 - Contain **[GeneSplicer](https://ccb.jhu.edu/software/genesplicer/)** 1.0 - Contain **[HTSlib](https://github.com/samtools/htslib)** 1.9 -- Contain **[Manta](https://github.com/Illumina/manta)** 1.5.0 -- Contain **[MultiQC](https://github.com/ewels/MultiQC/)** 1.7 -- Contain **[Qualimap](http://qualimap.bioinfo.cipf.es)** 2.2.2b +- Contain **[Manta](https://github.com/Illumina/manta)** 1.6.0 +- Contain **[MultiQC](https://github.com/ewels/MultiQC/)** 1.8 +- Contain **[Qualimap](http://qualimap.bioinfo.cipf.es)** 2.2.2d - Contain **[samtools](https://github.com/samtools/samtools)** 1.9 - Contain **[snpEff](http://snpeff.sourceforge.net/)** 4.3.1t - Contain **[Strelka2](https://github.com/Illumina/strelka)** 2.9.10 - Contain **[TIDDIT](https://github.com/SciLifeLab/TIDDIT)** 2.7.1 -- Contain **[VCFanno](https://github.com/brentp/vcfanno)** 0.3.1 +- Contain **[pigz](https://zlib.net/pigz/)** 2.3.4 +- Contain **[Trim Galore](https://github.com/FelixKrueger/TrimGalore)** 0.6.5 +- Contain **[VCFanno](https://github.com/brentp/vcfanno)** 0.3.2 - Contain **[VCFtools](https://vcftools.github.io/index.html)** 0.1.16 -- Contain **[VEP](https://github.com/Ensembl/ensembl-vep)** 95.2 +- Contain **[VEP](https://github.com/Ensembl/ensembl-vep)** 99.2 ### sareksnpeff [![sareksnpeff-docker status](https://img.shields.io/docker/automated/nfcore/sareksnpeff.svg)](https://hub.docker.com/r/nfcore/sareksnpeff) -- Based on `nfcore/base:latest` +- Based on `nfcore/base:1.9` - Contain **[snpEff](http://snpeff.sourceforge.net/)** 4.3.1t - Contains cache for `GRCh37`, `GRCh38`, `GRCm38` or `CanFam3.1` ### sarekvep [![sarekvep-docker status](https://img.shields.io/docker/automated/nfcore/sarekvep.svg)](https://hub.docker.com/r/nfcore/sarekvep) -- Based on `nfcore/base:latest` +- Based on `nfcore/base:1.9` - Contain **[GeneSplicer](https://ccb.jhu.edu/software/genesplicer/)** 1.0 -- Contain **[VEP](https://github.com/Ensembl/ensembl-vep)** 95.2 +- Contain **[VEP](https://github.com/Ensembl/ensembl-vep)** 99.2 - Contain cache for `GRCh37`, `GRCh38`, `GRCm38` or `CanFam3.1` ## Building your own diff --git a/docs/usage.md b/docs/usage.md index c06a875743..653787c914 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -84,8 +84,8 @@ - [--awscli](#--awscli) - [Other command line parameters](#other-command-line-parameters) - [--outdir](#--outdir) -- [--publish_dir_mode](#--publish_dir_mode) -- [--publishDirMode](#--publishdirmode) + - [--publish_dir_mode](#--publish_dir_mode) + - [--publishDirMode](#--publishdirmode) - [--sequencing_center](#--sequencing_center) - [--email](#--email) - [--email_on_fail](#--email_on_fail) @@ -799,13 +799,13 @@ Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a The output directory where the results will be saved. Default: `results/` -## --publish_dir_mode +### --publish_dir_mode The file publishing method. Available: `symlink`, `rellink`, `link`, `copy`, `copyNoFollow`, `move` Default: `copy` -## --publishDirMode +### --publishDirMode > :warning: This params is deprecated -- it will be removed in a future release. > Please check: [`--publish_dir_mode`](#--publish_dir_mode) diff --git a/downloadcache.nf b/downloadcache.nf index 1a1048a7e8..f48ed2e34f 100644 --- a/downloadcache.nf +++ b/downloadcache.nf @@ -20,11 +20,11 @@ Usage: you're reading it DOWNLOAD CACHE: - nextflow run build.nf --download_cache [--snpEff_cache ] [--vep_cache ] + nextflow run build.nf --download_cache [--snpeff_cache ] [--vep_cache ] [--cadd_cache --cadd_version ] --download_cache Will download specified cache - --snpEff_cache + --snpeff_cache Specify path to snpEff cache If none, will use snpEff version specified in configuration Will use snpEff cache version for ${params.genome}: ${params.genomes[params.genome].snpeffDb} in igenomes configuration file: @@ -55,7 +55,7 @@ params.offline = null params.cadd_cache = null params.cadd_version = 'v1.5' params.genome = 'GRCh37' -params.snpEff_cache = null +params.snpeff_cache = null params.vep_cache = null ch_referencesFiles = Channel.empty() @@ -152,7 +152,7 @@ ${summary.collect { k,v -> "
$k
${v ?: ' "
$k
${v ?: 'N/A'}
" } .reduce { a, b -> return [a, b].join("\n ") } .map { x -> """ - id: '{{ cookiecutter.name_noslash }}-summary' + id: 'sarek-summary' description: " - this information is collected when the pipeline is started." - section_name: '{{ cookiecutter.name }} Workflow Summary' - section_href: 'https://github.com/{{ cookiecutter.name }}' + section_name: 'nf-core/sarek Workflow Summary' + section_href: 'https://github.com/nf-core/sarek' plot_type: 'html' data: |
@@ -2687,20 +2687,20 @@ process Ascat { script: gender = genderMap[idPatient] - ascat_purity=params.ascat_purity - ascat_ploidy=params.ascat_ploidy - if (params.ascat_purity && params.ascat_ploidy) - """ - for f in *BAF *LogR; do sed 's/chr//g' \$f > tmpFile; mv tmpFile \$f;done - Rscript ${workflow.projectDir}/bin/run_ascat.r --tumorbaf ${bafTumor} --tumorlogr ${logrTumor} --normalbaf ${bafNormal} --normallogr ${logrNormal} --tumorname ${idSampleTumor} --basedir ${baseDir} --gcfile ${acLociGC} --gender ${gender} --purity ${ascat_purity} --ploidy ${ascat_ploidy} - """ - else + purity_ploidy = (params.ascat_purity && params.ascat_ploidy) ? "--purity ${params.ascat_purity} --ploidy ${params.ascat_ploidy}" : "" """ - for f in *BAF *LogR; do sed 's/chr//g' \$f > tmpFile; mv tmpFile \$f;done - Rscript ${workflow.projectDir}/bin/run_ascat.r --tumorbaf ${bafTumor} --tumorlogr ${logrTumor} --normalbaf ${bafNormal} --normallogr ${logrNormal} --tumorname ${idSampleTumor} --basedir ${baseDir} --gcfile ${acLociGC} --gender ${gender} + for f in *BAF *LogR; do sed 's/chr//g' \$f > tmpFile; mv tmpFile \$f;done + Rscript ${workflow.projectDir}/bin/run_ascat.r \ + --tumorbaf ${bafTumor} \ + --tumorlogr ${logrTumor} \ + --normalbaf ${bafNormal} \ + --normallogr ${logrNormal} \ + --tumorname ${idSampleTumor} \ + --basedir ${baseDir} \ + --gcfile ${acLociGC} \ + --gender ${gender} \ + ${purity_ploidy} """ - - } ascatOut.dump(tag:'ASCAT') @@ -3323,9 +3323,9 @@ process Output_documentation { workflow.onComplete { // Set up the e-mail variables - def subject = "[{{ cookiecutter.name }}] Successful: $workflow.runName" + def subject = "[nf-core/sarek] Successful: $workflow.runName" if (!workflow.success) { - subject = "[{{ cookiecutter.name }}] FAILED: $workflow.runName" + subject = "[nf-core/sarek] FAILED: $workflow.runName" } def email_fields = [:] email_fields['version'] = workflow.manifest.version @@ -3355,12 +3355,12 @@ workflow.onComplete { if (workflow.success) { mqc_report = ch_multiqc_report.getVal() if (mqc_report.getClass() == ArrayList) { - log.warn "[{{ cookiecutter.name }}] Found multiple reports from process 'multiqc', will use only one" + log.warn "[nf-core/sarek] Found multiple reports from process 'multiqc', will use only one" mqc_report = mqc_report[0] } } } catch (all) { - log.warn "[{{ cookiecutter.name }}] Could not attach MultiQC report to summary email" + log.warn "[nf-core/sarek] Could not attach MultiQC report to summary email" } // Check if we are only sending emails on failure @@ -3392,11 +3392,11 @@ workflow.onComplete { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[{{ cookiecutter.name }}] Sent summary e-mail to $email_address (sendmail)" + log.info "[nf-core/sarek] Sent summary e-mail to $email_address (sendmail)" } catch (all) { // Catch failures and try with plaintext [ 'mail', '-s', subject, email_address ].execute() << email_txt - log.info "[{{ cookiecutter.name }}] Sent summary e-mail to $email_address (mail)" + log.info "[nf-core/sarek] Sent summary e-mail to $email_address (mail)" } } @@ -3422,10 +3422,10 @@ workflow.onComplete { } if (workflow.success) { - log.info "-${c_purple}[{{ cookiecutter.name }}]${c_green} Pipeline completed successfully${c_reset}-" + log.info "-${c_purple}[nf-core/sarek]${c_green} Pipeline completed successfully${c_reset}-" } else { checkHostname() - log.info "-${c_purple}[{{ cookiecutter.name }}]${c_red} Pipeline completed with errors${c_reset}-" + log.info "-${c_purple}[nf-core/sarek]${c_red} Pipeline completed with errors${c_reset}-" } } @@ -3667,10 +3667,13 @@ def extractFastq(tsvFile) { def idRun = row[4] def file1 = returnFile(row[5]) def file2 = "null" - if (hasExtension(file1, "fastq.gz") || hasExtension(file1, "fq.gz")) { + if (hasExtension(file1, "fastq.gz") || hasExtension(file1, "fq.gz") || hasExtension(file1, "fastq") || hasExtension(file1, "fq")) { checkNumberOfItem(row, 7) file2 = returnFile(row[6]) - if (!hasExtension(file2, "fastq.gz") && !hasExtension(file2, "fq.gz")) exit 1, "File: ${file2} has the wrong extension. See --help for more information" + if (!hasExtension(file2, "fastq.gz") && !hasExtension(file2, "fq.gz") && !hasExtension(file2, "fastq") && !hasExtension(file2, "fq")) exit 1, "File: ${file2} has the wrong extension. See --help for more information" + if (hasExtension(file1, "fastq") || hasExtension(file1, "fq") || hasExtension(file2, "fastq") || hasExtension(file2, "fq")) { + exit 1, "We do recommend to use gziped fastq file to help you reduce your data footprint." + } } else if (hasExtension(file1, "bam")) checkNumberOfItem(row, 6) else "No recognisable extention for input file: ${file1}" diff --git a/nextflow.config b/nextflow.config index 7a44b22844..1ea5dd160a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -33,6 +33,7 @@ params { trim_nextseq = 0 save_trimmed = false split_fastq = null // Fastq files will not be split by default + single_end = false // No single end // Preprocessing markdup_java_options = '"-Xms4000m -Xmx7g"' // Established values for markDuplicate memory consumption, see https://github.com/SciLifeLab/Sarek/pull/689 for details @@ -50,7 +51,6 @@ params { pon_index = false // No default PON index for GATK Mutect2 / Sentieon TNscope target_bed = false // No default TargetBED file for targeted sequencing - // Annotation annotate_tools = null // Only with --step annotate annotation_cache = null // Annotation cache disabled