diff --git a/CHANGELOG.md b/CHANGELOG.md index 3306980fe6..b16d6abfb2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - [#115](https://github.com/nf-core/sarek/pull/115) - Add [@szilvajuhos](https://github.com/szilvajuhos) abstract for ESHG2020 - [#117](https://github.com/nf-core/sarek/pull/117) - Add `Trim Galore` possibilities to Sarek - [#141](https://github.com/nf-core/sarek/pull/141) - Add containers for `WBcel235` -- GitHub actions AWS megatests +- [#150](https://github.com/nf-core/sarek/pull/150), [#151](https://github.com/nf-core/sarek/pull/151), [#154](https://github.com/nf-core/sarek/pull/154) - Add AWS mega test GitHub Actions ### `Changed` @@ -40,6 +40,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - [#141](https://github.com/nf-core/sarek/pull/141) - Update `VEP` databases to `99` - [#143](https://github.com/nf-core/sarek/pull/143) - Revert `snpEff` cache version to `75` for `GRCh37` - [#143](https://github.com/nf-core/sarek/pull/143) - Revert `snpEff` cache version to `86` for `GRCh38` +- [#152](https://github.com/nf-core/sarek/pull/152) - Update docs ### `Fixed` @@ -52,9 +53,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - [#129](https://github.com/nf-core/sarek/pull/129) - Apply @drpatelh fix for `mardown_to_html.py` compatibility with Python 2 - [#129](https://github.com/nf-core/sarek/pull/129) - Removed `Python` `3.7.3` from conda environment due to incompatibility - [#129](https://github.com/nf-core/sarek/pull/129) - Change ascii characters that were not supported from the `output.md` docs +- [#140](https://github.com/nf-core/sarek/pull/140) - Fix extra T/N combinations for `ASCAT` cf [#136](https://github.com/nf-core/sarek/issues/136) - [#141](https://github.com/nf-core/sarek/pull/141) - Fix `download_cache.nf` script to download cache for `snpEff` and `VEP` - [#143](https://github.com/nf-core/sarek/pull/143) - Fix annotation CI testing with `snpEff` and `VEP` - [#144](https://github.com/nf-core/sarek/pull/144) - Fix CircleCI for building `VEP` containers +- [#146](https://github.com/nf-core/sarek/pull/146) - Fix `--no_intervals` for `Mutect2` cf [#135](https://github.com/nf-core/sarek/issues/135) ### `Deprecated` @@ -93,6 +96,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - [#107](https://github.com/nf-core/sarek/pull/107) - `--singleCPUMem` is now removed, use `--single_cpu_mem` instead - [#107](https://github.com/nf-core/sarek/pull/107) - `--snpeffDb` is now removed, use `--snpeff_db` instead - [#107](https://github.com/nf-core/sarek/pull/107) - `--vepCacheVersion` is now removed, use `--vep_cache_version` instead +- [#152](https://github.com/nf-core/sarek/pull/152) - Removed `Jenkinsfile` ## [2.5.2] - Jåkkåtjkaskajekna diff --git a/Jenkinsfile b/Jenkinsfile deleted file mode 100644 index 2a26fd3d5f..0000000000 --- a/Jenkinsfile +++ /dev/null @@ -1,63 +0,0 @@ -pipeline { - agent any - - environment { - JENKINS_API = credentials('api') - } - - stages { - stage('Docker setup') { - steps { - sh "docker pull nfcore/sarek:dev" - sh "docker tag nfcore/sarek:dev nfcore/sarek:dev" - sh "docker pull nfcore/sareksnpeff:dev.GRCh37" - sh "docker tag nfcore/sareksnpeff:dev.GRCh37 nfcore/sareksnpeff:dev.GRCh37" - sh "docker pull nfcore/sarekvep:dev.GRCh37" - sh "docker tag nfcore/sarekvep:dev.GRCh37 nfcore/sarekvep:dev.GRCh37" - } - } - stage('Annotation') { - steps { - sh "nextflow run . -profile test_annotation,kraken --verbose --tools snpeff,vep,merge" - } - } - stage('Germline') { - steps { - sh "rm -rf data/" - sh "git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data" - sh "nextflow run . -profile test,kraken --input data/testdata/tiny/normal" - sh "nextflow run . -profile test,kraken --input=false --step recalibrate -resume" - sh "nextflow run . -profile test,kraken --input=false --step variantCalling" - sh "rm -rf data/" - } - } - stage('Minimal') { - steps { - sh "nextflow run . -profile test,kraken --skipQC all --verbose --genome smallerGRCh37 --no_intervals --tools Manta,mpileup,Strelka" - sh "nextflow run . -profile test,kraken --skipQC all --verbose --genome smallerGRCh37 --tools Manta,mpileup,Strelka" - sh "nextflow run . -profile test,kraken --skipQC all --verbose --genome minimalGRCh37 --no_intervals --tools Manta,mpileup,Strelka" - sh "nextflow run . -profile test,kraken --skipQC all --verbose --genome minimalGRCh37 --tools Manta,mpileup,Strelka" - } - } - stage('Profile') { - steps { - sh "nextflow run . -profile test_splitfastq,kraken --verbose" - sh "nextflow run . -profile test_targeted,kraken --verbose" - } - } - stage('Tools') { - steps { - sh "nextflow run . -profile test_tool,kraken --verbose --tools Haplotypecaller,Freebayes,Manta,mpileup,Mutect2,Strelka" - } - } - } - - post { - failure { - script { - def response = sh(script: "curl -u ${JENKINS_API_USR}:${JENKINS_API_PSW} ${BUILD_URL}/consoleText", returnStdout: true).trim().replace('\n', '
') - def comment = pullRequest.comment("## :rotating_light: Buil log output:
${response}
") - } - } - } -} diff --git a/README.md b/README.md index 8c73bcd706..4fa84f9aea 100644 --- a/README.md +++ b/README.md @@ -82,19 +82,28 @@ Main authors: Helpful contributors: -* [Johannes Alneberg](https://github.com/alneberg) -* [Phil Ewels](https://github.com/ewels) +* [Adrian Lärkeryd](https://github.com/adrlar) +* [Alexander Peltzer](https://github.com/apeltzer) +* [Chela James](https://github.com/chelauk) +* [Francesco L](https://github.com/nibscles) +* [FriederikeHanssen](https://github.com/FriederikeHanssen) +* [Gisela Gabernet](https://github.com/ggabernet) * [Jesper Eisfeldt](https://github.com/J35P312) +* [Johannes Alneberg](https://github.com/alneberg) +* [KochTobi](https://github.com/KochTobi) +* [Lucia Conde](https://github.com/lconde-ucl) * [Malin Larsson](https://github.com/malinlarsson) * [Marcel Martin](https://github.com/marcelm) -* [Alexander Peltzer](https://github.com/apeltzer) * [Nilesh Tawari](https://github.com/nilesh-tawari) +* [Phil Ewels](https://github.com/ewels) +* [Sabrina Krakau](https://github.com/skrakau) +* [Sebastian-D](https://github.com/Sebastian-D) +* [Winni Kretzschmar](https://github.com/winni2k) * [arontommi](https://github.com/arontommi) * [bjornnystedt](https://github.com/bjornnystedt) +* [cgpu](https://github.com/cgpu) * [gulfshores](https://github.com/gulfshores) -* [KochTobi](https://github.com/KochTobi) * [pallolason](https://github.com/pallolason) -* [Sebastian-D](https://github.com/Sebastian-D) * [silviamorins](https://github.com/silviamorins) ## Contributions & Support @@ -116,7 +125,7 @@ For further information or help, don't hesitate to get in touch on [Slack](https ## Citation If you use `nf-core/sarek` for your analysis, please cite the `Sarek` pre-print as follows: -> Garcia M, Juhos S, Larsson M et al. **Sarek: A portable workflow for whole-genome sequencing analysis of germline and somatic variants [version 1; peer review: awaiting peer review].**. *F1000Research* 2020, 9:63. [doi: 10.12688/f1000research.16665.1](https://f1000research.com/articles/9-63/v1). +> Garcia M, Juhos S, Larsson M et al. **Sarek: A portable workflow for whole-genome sequencing analysis of germline and somatic variants [version 1; peer review: 1 approved]** *F1000Research* 2020, 9:63 [doi: 10.12688/f1000research.16665.1](https://f1000research.com/articles/9-63/v1). You can cite the sarek zenodo record for a specific version using the following [doi: 10.5281/zenodo.3476426](https://zenodo.org/badge/latestdoi/184289291) diff --git a/containers/vep/Dockerfile b/containers/vep/Dockerfile index 4bf89db555..e771071cb0 100644 --- a/containers/vep/Dockerfile +++ b/containers/vep/Dockerfile @@ -17,7 +17,7 @@ RUN conda env export --name nf-core-sarek-vep-dev > nf-core-sarek-vep-dev.yml # Setup default ARG variables ARG GENOME=GRCh38 ARG SPECIES=homo_sapiens -ARG VEP_VERSION=95 +ARG VEP_VERSION=99 # Download Genome RUN vep_install \ diff --git a/docs/annotation.md b/docs/annotation.md index 8500a6d59e..48521947cb 100644 --- a/docs/annotation.md +++ b/docs/annotation.md @@ -16,16 +16,26 @@ With Sarek, annotation is done using `snpEff`, `VEP`, or even both consecutively VCF produced by Sarek will be annotated if `snpEff` or `VEP` are specified with the `--tools` command. As Sarek will use `bgzip` and `tabix` to compress and index VCF files annotated, it expects VCF files to be sorted. -In these examples, all command lines will be launched starting with step `annotate`. +In these examples, all command lines will be launched starting with `--step annotate`. It can of course be started directly from any other step instead. ## Using genome specific containers -Sarek has already designed containers with `snpEff` and `VEP` files for `GRCh37`, `GRCh38` and `GRCm38`. +Sarek has already designed containers with `snpEff` and `VEP` files for Human (`GRCh37`, `GRCh38`) Mouse (`GRCm38`), Dog (`CanFam3.1`) and Roundworm (`WBcel235`). Default settings will run using these containers. The main Sarek container has also `snpEff` and `VEP` installed, but without the cache files that can be downloaded separately. +## Download cache + +A Nextflow helper script has been designed to help downloading `snpEff` and `VEP` cache. +Such files are meant to be shared between multiple users, so this script is mainly meant for people administrating servers, clusters and advanced users. + +```bash +nextflow run download_cache.nf --snpeff_cache --snpeff_db --genome +nextflow run download_cache.nf --vep_cache --species --vep_cache_version --genome +``` + ## Using downloaded cache Both `snpEff` and `VEP` enable usage of cache. @@ -36,8 +46,8 @@ The cache will only be used when `--annotation_cache` and cache directories are Example: ```bash -nextflow run nf-core/sarek --tools snpEff --step annotate --sample file.vcf.gz --snpEff_cache /Path/To/snpEffCache --annotation_cache -nextflow run nf-core/sarek --tools VEP --step annotate --sample file.vcf.gz --vep_cache /Path/To/vepCache --annotation_cache +nextflow run nf-core/sarek --tools snpEff --step annotate --sample file.vcf.gz --snpEff_cache --annotation_cache +nextflow run nf-core/sarek --tools VEP --step annotate --sample file.vcf.gz --vep_cache --annotation_cache ``` ## Using VEP CADD plugin @@ -52,10 +62,10 @@ Example: ```bash nextflow run nf-core/sarek --step annotate --tools VEP --sample file.vcf.gz --cadd_cache \ - --cadd_InDels /PathToCADD/InDels.tsv.gz \ - --cadd_InDels_tbi /PathToCADD/InDels.tsv.gz.tbi \ - --cadd_WG_SNVs /PathToCADD/whole_genome_SNVs.tsv.gz \ - --cadd_WG_SNVs_tbi /PathToCADD/whole_genome_SNVs.tsv.gz.tbi + --cadd_InDels \ + --cadd_InDels_tbi \ + --cadd_WG_SNVs \ + --cadd_WG_SNVs_tbi ``` ### Downloading CADD files @@ -64,7 +74,7 @@ An helper script has been designed to help downloading CADD files. Such files are meant to be share between multiple users, so this script is mainly meant for people administrating servers, clusters and advanced users. ```bash -nextflow run downloadcache.nf --cadd_cache /Path/To/CADDcache --cadd_version --genome +nextflow run download_cache.nf --cadd_cache --cadd_version --genome ``` ## Using VEP GeneSplicer plugin diff --git a/download_cache.nf b/download_cache.nf index f5be2032cd..988b098d58 100644 --- a/download_cache.nf +++ b/download_cache.nf @@ -23,29 +23,27 @@ def helpMessage() { log.info nfcoreHeader() log.info""" -Usage: - --help - you're reading it - - nextflow run download_cache.nf [--snpeff_cache ] [--vep_cache ] - [--cadd_cache --cadd_version ] - --snpeff_cache - Specify path to snpEff cache - If none, will use snpEff version specified in configuration - Will use snpEff cache version for ${params.genome}: ${params.genomes[params.genome].snpeff_db} in igenomes configuration file: - Change with --genome or in configuration files - --vep_cache - Specify path to VEP cache - If none, will use VEP version specified in configuration - Will from th iGenomes configuration file for ${params.genome}: - VEP cache version: ${params.genomes[params.genome].vep_cache_version} - and species ${params.genomes[params.genome].species} - Change with --genome or in configuration files - --cadd_cache - Specify path to CADD cache - Will use CADD version specified - --cadd_version - Will specify which CADD version to download + Usage: + + The typical command for running the pipeline is as follows: + + nextflow run nf-core/sarek/download_cache.nf -profile docker --genome --help + [--snpeff_cache --snpeff_db_version ] + [--vep_cache --vep_cache_version --species ] + [--cadd_cache --cadd_version ] + + Options: + --help [bool] You're reading it + --snpeff_cache [file] Path to snpEff cache + --snpeff_db_version [str] snpEff DB version + Default: ${params.genomes[params.genome].snpeff_db} + --vep_cache [file] Path to VEP cache + --vep_cache_version [int] VEP cache version + Default: ${params.genomes[params.genome].vep_cache_version} + --species [str] Species + Default: ${params.genomes[params.genome].species} + --cadd_cache [file] Path to CADD cache + --cadd_version [str] CADD version to download """.stripIndent() } @@ -216,20 +214,18 @@ process DownloadCADD { cadd_files = cadd_files.dump(tag: 'cadd_files') -def nfcoreHeader(){ +def nfcoreHeader() { // Log colors ANSI codes - c_reset = params.monochrome_logs ? '' : "\033[0m"; - c_dim = params.monochrome_logs ? '' : "\033[2m"; c_black = params.monochrome_logs ? '' : "\033[0;30m"; - c_red = params.monochrome_logs ? '' : "\033[0;31m"; - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; c_blue = params.monochrome_logs ? '' : "\033[0;34m"; + c_dim = params.monochrome_logs ? '' : "\033[2m"; + c_green = params.monochrome_logs ? '' : "\033[0;32m"; c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; + c_reset = params.monochrome_logs ? '' : "\033[0m"; c_white = params.monochrome_logs ? '' : "\033[0;37m"; + c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; - return """ ${c_dim}----------------------------------------------------${c_reset} + return """ -${c_dim}--------------------------------------------------${c_reset}- ${c_green},--.${c_black}/${c_green},-.${c_reset} ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} @@ -243,7 +239,7 @@ def nfcoreHeader(){ ${c_white}`${c_green}|${c_reset}____${c_green}\\${c_reset}´${c_reset} ${c_purple} nf-core/sarek v${workflow.manifest.version}${c_reset} - ${c_dim}----------------------------------------------------${c_reset} + -${c_dim}--------------------------------------------------${c_reset}- """.stripIndent() } diff --git a/main.nf b/main.nf index 556fb1f29e..08e9876c22 100644 --- a/main.nf +++ b/main.nf @@ -44,6 +44,7 @@ def helpMessage() { Default: Mapping Options: + --help [bool] You're reading it --no_gvcf [bool] No g.vcf output from HaplotypeCaller --no_strelka_bp [bool] Will not use Manta candidateSmallIndels for Strelka as Best Practice --no_intervals [bool] Disable usage of intervals @@ -57,20 +58,24 @@ def helpMessage() { snpEff, VEP, merge Default: None --skip_qc [str] Specify which QC tools to skip when running Sarek - Available: all, bamQC, BaseRecalibrator, BCFtools, Documentation, FastQC, MultiQC, samtools, vcftools, versions + Available: all, bamQC, BaseRecalibrator, BCFtools, Documentation + FastQC, MultiQC, samtools, vcftools, versions Default: None --annotate_tools [str] Specify from which tools Sarek will look for VCF files to annotate, only for step annotate Available: HaplotypeCaller, Manta, Mutect2, Strelka, TIDDIT Default: None --sentieon [bool] If sentieon is available, will enable it for preprocessing, and variant calling - Adds the following tools for --tools: DNAseq, DNAscope and TNscope + Adds the following options for --tools: DNAseq, DNAscope and TNscope --annotation_cache [bool] Enable the use of cache for annotation, to be used with --snpeff_cache and/or --vep_cache --snpeff_cache [file] Specity the path to snpEff cache, to be used with --annotation_cache --vep_cache [file] Specity the path to VEP cache, to be used with --annotation_cache - --pon [file] Panel-of-normals VCF (bgzipped, indexed). See: https://software.broadinstitute.org/gatk/documentation/tooldocs/current/org_broadinstitute_hellbender_tools_walkers_mutect_CreateSomaticPanelOfNormals.php + --pon [file] Panel-of-normals VCF (bgzipped, indexed) + See: https://software.broadinstitute.org/gatk/documentation/tooldocs/current/org_broadinstitute_hellbender_tools_walkers_mutect_CreateSomaticPanelOfNormals.php --pon_index [file] Index of pon panel-of-normals VCF - --ascat_ploidy [int] Use this parameter together with to overwrite default behavior from ASCAT regarding ploidy. Note: Also requires that --ascat_purity is set. - --ascat_purity [int] Use this parameter to overwrite default behavior from ASCAT regarding purity. Note: Also requires that --ascat_ploidy is set. + --ascat_ploidy [int] Use this parameter to overwrite default behavior from ASCAT regarding ploidy + Requires that --ascat_purity is set + --ascat_purity [int] Use this parameter to overwrite default behavior from ASCAT regarding purity + Requires that --ascat_ploidy is set Trimming: --trim_fastq [bool] Run Trim Galore @@ -105,7 +110,7 @@ def helpMessage() { If none provided, will be generated automatically if a knownIndels file is provided --species [str] Species for VEP --snpeff_db [str] snpEff Database version - --vep_cache_version [str] VEP Cache version + --vep_cache_version [int] VEP Cache version Other options: --outdir [file] The output directory where the results will be saved @@ -120,9 +125,9 @@ def helpMessage() { -name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic AWSBatch options: - --awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch - --awsregion [str] The AWS Region for your AWS Batch job to run on - --awscli [str] Path to the AWS CLI tool + --awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch + --awsregion [str] The AWS Region for your AWS Batch job to run on + --awscli [str] Path to the AWS CLI tool """.stripIndent() } @@ -471,7 +476,6 @@ ch_cadd_wg_snvs_tbi = params.cadd_wg_snvs_tbi ? Channel.value(file(params.cadd_w ch_pon = params.pon ? Channel.value(file(params.pon)) : "null" ch_target_bed = params.target_bed ? Channel.value(file(params.target_bed)) : "null" - /* ================================================================================ PRINTING SUMMARY