diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index be4b5d033a..722b3b4850 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,9 +40,12 @@ jobs: - "gatk4_spark" - "haplotypecaller" - "manta" + - "markduplicates" - "mutect2" - "msisensorpro" # - 'save_bam_mapped' + - "prepare_recalibration" + - "recalibrate" - "variantcalling_channel" - "skip_markduplicates" - "strelka" diff --git a/CHANGELOG.md b/CHANGELOG.md index 12dde941b8..4830720024 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#512](https://github.com/nf-core/sarek/pull/512), [#531](https://github.com/nf-core/sarek/pull/531), [#537](https://github.com/nf-core/sarek/pull/537) - Subway map for pipeline - [#522](https://github.com/nf-core/sarek/pull/522) - Add QC for vcf files & MultiQC - [#533](https://github.com/nf-core/sarek/pull/533) - Add param `--only_paired_variant_calling` to allow skipping of germline variantcalling for paired samples +- [#536](https://github.com/nf-core/sarek/pull/536) - Add `--step markduplicates` to start from duplicate marking, `--step prepare_recalibration` now ONLY starts at process `BaseRecalibrator` & adding `bam` and `cram` input support for `--step` `markduplicates`, `prepare_recalibration`, `recalibrate`, and `variant_calling` - [#538](https://github.com/nf-core/sarek/pull/538) - Add param `--seq_platform`, default: `ILLUMINA` ### Changed diff --git a/conf/modules.config b/conf/modules.config index a47053d427..fedaf85b45 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -91,7 +91,7 @@ process { } withName: 'TABIX_DBSNP' { - ext.when = { !params.dbsnp_tbi && params.dbsnp && (params.step == "mapping" || params.step == "prepare_recalibration") || params.tools && (params.tools.contains('controlfreec') || params.tools.contains('haplotypecaller') || params.tools.contains('mutect2')) } + ext.when = { !params.dbsnp_tbi && params.dbsnp && (params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.contains('controlfreec') || params.tools.contains('haplotypecaller') || params.tools.contains('mutect2')) } publishDir = [ enabled: params.save_reference, mode: params.publish_dir_mode, @@ -111,7 +111,7 @@ process { } withName: 'TABIX_KNOWN_INDELS' { - ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == 'prepare_recalibration') } + ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration') } publishDir = [ enabled: params.save_reference, mode: params.publish_dir_mode, @@ -294,6 +294,32 @@ process { // MARKDUPLICATES + withName: 'SAMTOOLS_CRAMTOBAM'{ + ext.args = "-b" + } + + withName: 'SAMTOOLS_BAMTOCRAM.*' { + // BAM provided for step Markduplicates either run through MD or Convert -> then saved as md.cram + // BAM files provided for step prepare_recal are converted and run through BQSR -> then saved as md.cram + // BAM files provided for step recal are converted and run through BQSR II -> then saved as md.cram + ext.args = "-C" + ext.prefix = { "${meta.id}.md" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/${meta.id}/markduplicates" }, + pattern: "*{cram,crai}" + ] + } + + withName: 'SAMTOOLS_BAMTOCRAM_VARIANTCALLING' { + ext.prefix = { "${meta.id}.recal" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/${meta.id}/recalibrated" }, + pattern: "*{cram,crai}" + ] + } + withName: 'GATK4_ESTIMATELIBRARYCOMPLEXITY|GATK4_MARKDUPLICATES' { ext.prefix = { "${meta.id}.md" } publishDir = [ @@ -325,15 +351,6 @@ process { ext.when = { !(params.skip_tools && params.skip_tools.contains('markduplicates')) } } - withName: 'SAMTOOLS_BAMTOCRAM' { - ext.prefix = { "${meta.id}.md" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/${meta.id}/markduplicates" }, - pattern: "*{cram,crai}" - ] - } - withName: 'INDEX_MARKDUPLICATES' { publishDir = [ mode: params.publish_dir_mode, @@ -408,17 +425,6 @@ process { ] } - withName: 'QUALIMAP_BAMQC' { - ext.args = '--paint-chromosome-limits --genome-gc-distr HUMAN -skip-duplicated --skip-dup-mode 0 -outformat HTML' - ext.prefix = { "${meta.id}.mapped" } - ext.when = { !(params.skip_tools && params.skip_tools.contains('bamqc')) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/qualimap/${meta.id}" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: 'SAMTOOLS_STATS' { ext.when = { !(params.skip_tools && params.skip_tools.contains('samtools')) } publishDir = [ @@ -439,7 +445,7 @@ process { withName: 'QUALIMAP_BAMQCCRAM' { ext.args = '--paint-chromosome-limits --genome-gc-distr HUMAN -skip-duplicated --skip-dup-mode 0 -outformat HTML' - ext.prefix = { "${meta.id}.recal" } + ext.prefix = { "${meta.id}.mapped" } ext.when = { !(params.skip_tools && params.skip_tools.contains('bamqc')) } publishDir = [ mode: params.publish_dir_mode, @@ -448,6 +454,10 @@ process { ] } + withName: 'NFCORE_SAREK:SAREK:CRAM_QC:QUALIMAP_BAMQCCRAM' { + ext.prefix = { "${meta.id}.recal" } + } + withName: 'NFCORE_SAREK:SAREK:CRAM_QC:SAMTOOLS_STATS' { ext.when = { !(params.skip_tools && params.skip_tools.contains('samtools')) } publishDir = [ diff --git a/conf/test.config b/conf/test.config index 43010f4096..5e6a864bdc 100644 --- a/conf/test.config +++ b/conf/test.config @@ -55,13 +55,36 @@ profiles { pair { params.input = "${baseDir}/tests/csv/3.0/fastq_pair.csv" } - prepare_recalibration { - params.input = "${baseDir}/tests/csv/3.0/mapped_single.csv" + markduplicates_bam { + params.input = "${baseDir}/tests/csv/3.0/mapped_single_bam.csv" + params.step = 'markduplicates' + } + markduplicates_cram { + params.input = "${baseDir}/tests/csv/3.0/mapped_single_cram.csv" + params.step = 'markduplicates' + } + prepare_recalibration_bam { + params.input = "${baseDir}/tests/csv/3.0/mapped_single_bam.csv" + params.step = 'prepare_recalibration' + } + prepare_recalibration_cram { + params.input = "${baseDir}/tests/csv/3.0/mapped_single_cram.csv" params.step = 'prepare_recalibration' } + recalibrate_bam { + params.input = "${baseDir}/tests/csv/3.0/prepare_recalibration_single_bam.csv" + params.step = 'recalibrate' + } + recalibrate_cram { + params.input = "${baseDir}/tests/csv/3.0/prepare_recalibration_single_cram.csv" + params.step = 'recalibrate' + } save_bam_mapped { params.save_bam_mapped = true } + skip_bqsr { + params.skip_tools = "baserecalibrator" + } skip_markduplicates { params.skip_tools = "markduplicates" } @@ -70,8 +93,9 @@ profiles { params.save_split_fastqs = true } targeted { - params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/multi_intervals.bed" - params.wes = true + params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.multi_intervals.bed" + params.wes = true + params.nucleotides_per_second = 20 } tools { params.input = "${baseDir}/tests/csv/3.0/recalibrated.csv" @@ -85,7 +109,6 @@ profiles { params.wes = true params.genome = 'WBcel235' params.vep_genome = 'WBcel235' - //params.vep_cache = } tools_germline { params.input = "${baseDir}/tests/csv/3.0/recalibrated_germline.csv" diff --git a/docs/images/sarek_subway.png b/docs/images/sarek_subway.png index 5a6dfd2d1c..b1ce4b48bf 100644 Binary files a/docs/images/sarek_subway.png and b/docs/images/sarek_subway.png differ diff --git a/docs/images/sarek_subway.svg b/docs/images/sarek_subway.svg index e7f0fc9208..f1b41165c5 100644 --- a/docs/images/sarek_subway.svg +++ b/docs/images/sarek_subway.svg @@ -2,25 +2,48 @@ + inkscape:export-ydpi="90" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns="http://www.w3.org/2000/svg" + xmlns:svg="http://www.w3.org/2000/svg"> + + id="defs2"> @@ -203,7 +226,7 @@ @@ -218,7 +241,7 @@ @@ -233,7 +256,7 @@ @@ -248,7 +271,7 @@ @@ -263,7 +286,7 @@ @@ -278,7 +301,7 @@ @@ -299,7 +322,7 @@ @@ -314,7 +337,7 @@ @@ -688,13 +711,13 @@ id="d-5"> + d="M 0,266 H 1022 V 0 H 0 Z" /> + d="m 280.17,136.33 -21.5,-21.584 h 61 v 21.584 z" /> + d="M 0,266 H 1022 V 0 H 0 Z" /> - - - - - image/svg+xml - - - - - - - - - - - - + id="layer1" + transform="translate(-2.8132356,-46.639854)"> - - - - - - - - trimgalore - UMI - mapping - - - ubam - - - - - vcf - - - - vcf - + inkscape:export-ydpi="90"> + - - vcf - - - fastqc - fastqc - - - - - - + bam - - - seqkit split2 - + id="text886" + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:37.4999px;line-height:1.25;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro';white-space:pre;shape-inside:url(#rect888);fill:#000000;fill-opacity:1;stroke:none;stroke-width:3.54334" + x="-13.845469" + y="64.38546" /> - - fastq - + id="g1624-0-2-1-4-7-7-9" + transform="matrix(3.5433071,0,0,-3.5433071,-766.85003,1100.0056)" + style="stroke:#000000;stroke-width:1.00002;stroke-opacity:1" /> - - fastq + style="stroke:#cc3333;stroke-width:1.05835;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + transform="matrix(-2.5054965,-2.5054965,-2.5054965,2.5054965,1579.6094,169.55827)" + id="g4803"> + - - - ensemblvep - snpeff - - deeptools, samtools, qualimap - samtools, qualimap - bcftools, vcftools - multiqc - - - + vcf - - - convert - + id="text886-8" + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:37.4999px;line-height:1.25;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro';white-space:pre;shape-inside:url(#rect888);fill:#000000;fill-opacity:1;stroke:none;stroke-width:3.54334" + x="1499.5474" + y="-819.54309" /> - - cram - - - - - - - - - - markduplicates - variant calling - - Optionnal - Mandatory - Tumor-normal pair variant calling - Core workflow - Germline variant calling - Tumor only variant calling - - - - - - - - - - - - prepare recalibration - - applybqsr - - - - - - - - + id="g1624-0-2-1-4-7-7-9-5" + transform="matrix(3.5433071,0,0,-3.5433071,746.54284,216.07702)" + style="stroke:#000000;stroke-width:1.00002;stroke-opacity:1" /> + - - + style="fill:none;stroke:#24af63;stroke-width:3.75005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + d="m 437.78969,-539.69042 c 3.49369,-0.0379 7.47317,0.84402 9.94369,3.31454 l 5.80048,5.80048 c 2.47057,2.47052 6.45005,3.3524 9.94374,3.31454" + id="path6727" + sodipodi:nodetypes="cscc" /> + style="fill:none;stroke:#24af63;stroke-width:3.75005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + d="m 427.5197,-514.83128 c -3.49369,0.0379 -7.47317,-0.84402 -9.94369,-3.31454 l -5.80048,-5.80048 c -2.47057,-2.47052 -6.45005,-3.3524 -9.94374,-3.31454" + id="path6731" + sodipodi:nodetypes="cscc" /> + - - - - deepvariant - freebayes - haplotypecaller - manta - strelka2 - tiddit - mutect2 - ascat - msisensorpro - controlfreec - - + sodipodi:nodetypes="cccccc" + id="path6749" + d="m 127.34544,-595.69834 c 0,0 2.81249,-3e-5 7.50001,0 4.41941,0 9.37498,4.95557 9.37498,9.37499 v 47.81249 c 0,4.41942 4.9556,11.25001 9.37501,11.25001 l 9.37502,-10e-6" + style="display:inline;fill:none;stroke:#24af63;stroke-width:3.75005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" + inkscape:connector-curvature="0" /> + trimgalore + UMI + mapping + id="g8614" + transform="translate(-4.285391)" + style="stroke-width:1.00002" + inkscape:export-filename="/Users/monarchy/Projects/Coding/sarek/docs/images/sarek_subway.png" + inkscape:export-xdpi="90" + inkscape:export-ydpi="90"> + d="m 112.97495,-602.18695 h -1.09489 v -5.28288 c 0,-0.0331 -0.005,-0.0661 -0.01,-0.0995 -0.002,-0.20978 -0.0685,-0.41582 -0.21209,-0.57907 l -8.79133,-10.04258 c -0.003,-0.004 -0.005,-0.004 -0.007,-0.006 -0.0524,-0.0585 -0.1135,-0.10718 -0.17807,-0.14931 -0.0191,-0.0126 -0.0384,-0.0235 -0.0584,-0.0349 -0.056,-0.0305 -0.11532,-0.0558 -0.17642,-0.0742 -0.0165,-0.005 -0.0314,-0.0113 -0.048,-0.0157 -0.0664,-0.0157 -0.13535,-0.0257 -0.20522,-0.0257 H 80.588017 c -0.986614,0 -1.788118,0.8024 -1.788118,1.78822 v 14.52123 h -1.094641 c -1.41137,0 -2.555766,1.14389 -2.555766,2.55576 v 13.29043 c 0,1.41088 1.144396,2.5555 2.555766,2.5555 h 1.094724 v 9.09772 c 0,0.98569 0.801504,1.78818 1.788117,1.78818 h 29.503921 c 0.9857,0 1.78812,-0.8024 1.78812,-1.78818 v -9.09772 h 1.09489 c 1.41095,0 2.5556,-1.14462 2.5556,-2.5555 v -13.29003 c -8e-5,-1.41179 -1.14473,-2.55576 -2.55568,-2.55576 z m -32.386929,-14.52133 h 20.711769 v 9.14861 c 0,0.49398 0.40075,0.89407 0.89405,0.89407 h 7.8981 v 4.47921 H 80.588021 Z m 29.504009,41.5363 H 80.588021 v -8.61368 h 29.503919 v 8.61368 z" + id="path6843" /> vcf - - - - vcf - - + id="rect6845" />ubam - vcf + style="fill:#ffffff;stroke-width:0.435688" /> - - - - - - - + style="display:inline;stroke-width:1.00002" + id="g9511-5" + transform="translate(-26.7589,-206.23598)"> + id="g9201-8" + transform="matrix(0.4356832,0,0,-0.4356832,740.01999,372.05432)" + style="display:inline;stroke-width:1.00002"> + id="path9189-6" + style="stroke-width:1.00002" /> cram vcf + + style="fill:#ffffff;stroke-width:1.00002" /> + transform="matrix(0.4356832,0,0,-0.4356832,737.58022,374.48441)" + id="g9215-4" + style="display:inline;stroke-width:1.00002"> + sodipodi:nodetypes="scsccccccccssscsssscsssscsscsccsscccccccccc" + style="stroke-width:1.00002" /> cram vcf + + id="path9213-0" /> + + + + vcf + - - - cram - - - deepvariant - freebayes - manta - strelka2 - - - - - - - - + + fastqc + fastqc + + sodipodi:nodetypes="cc" + id="path6901" + d="M 127.33708,-527.26085 H 404.84546" + style="display:inline;fill:none;stroke:#24af63;stroke-width:3.75005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" + inkscape:connector-curvature="0" /> - - + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3.75005;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" + d="m 125.13756,-522.57443 h 3.75 v 0.002 l 4.85596,-4.68933 -4.85596,-4.6875 h -3.75 z" + id="path1381" /> + + + bam/cram + + + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3.75005;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" + d="m 374.03143,-566.71979 v 3.75 h -0.002 l 4.68933,4.85596 4.6875,-4.85596 v -3.75 z" + id="path1474" /> + seqkit split2 + + + + fastq + + + + + fastq + + + + ensemblvep + snpeff + - - convert - - - - - - - - - - - - - - - - - - - - - - + sodipodi:nodetypes="cc" /> + deeptools, samtools, qualimap + samtools, qualimap + bcftools, vcftools + multiqc + + + vcf + + + style="display:inline;fill:none;stroke:#000000;stroke-width:3.75005;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" + d="m 918.08626,-519.99523 v -14.53124" + id="path2667" /> + convert + + + + bam/cram + + + + + + + + + bam/cram + + + + + - - + style="display:inline;fill:none;stroke:#24af63;stroke-width:3.75005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" + d="m 773.67705,-563.82334 v 36.5625" + id="path3233" + sodipodi:nodetypes="cc" /> + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3.75005;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" + d="m 768.99064,-566.71979 v 3.75 h -0.002 l 4.68932,4.85596 4.6875,-4.85596 v -3.75 z" + id="path3235" /> - - + style="display:inline;fill:none;stroke:#24af63;stroke-width:3.75005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" + d="m 378.71785,-563.82334 v 36.5625" + id="path3262" + sodipodi:nodetypes="cc" /> - + + markduplicates + variant calling + + Optionnal + Mandatory + Tumor-normal pair variant calling + Core workflow + Germline variant calling + Tumor only variant calling + - - + sodipodi:nodetypes="cc" /> + style="display:inline;fill:none;stroke:#377eb8;stroke-width:3.75005;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" + d="M 592.45472,-36.523743 H 570.42348" + id="path4258" /> - - - - - - - + sodipodi:nodetypes="cc" /> + sodipodi:nodetypes="cc" /> - - + style="display:inline;fill:none;stroke:#e41a1c;stroke-width:3.75005;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" + d="M 592.45472,31.991888 H 570.42348" + id="path4350" /> + + + + + + + prepare recalibration + + applybqsr + + + + + + + + + + + + + + + sodipodi:nodetypes="cc" /> - - + style="fill:none;stroke:#377eb8;stroke-width:3.75005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + d="m 407.11088,-294.58186 h -7.23225 c -3.59373,0 -6.99068,2.4402 -9.64304,4.9187 l -23.03546,21.52548 c -2.54117,2.59238 -6.04931,4.91871 -9.64304,4.91871 h -7.23225" + id="path1946" + sodipodi:nodetypes="csccsc" /> + deepvariant + freebayes + haplotypecaller + manta + strelka2 + tiddit + mutect2 + ascat + msisensorpro + controlfreec + style="fill:none;stroke:#e41a1c;stroke-width:3.75005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + d="m 802.64772,-259.46897 h -7.23225 c -3.59373,0 -6.99069,2.4402 -9.64306,4.9187 l -23.03544,21.52548 c -2.54119,2.59238 -6.04932,4.91871 -9.64305,4.91871 h -7.23226" + id="path1893" + sodipodi:nodetypes="csccsc" /> + + + + vcf + + + + + vcf + + + + + vcf + + + - - - + sodipodi:nodetypes="csccsc" + id="path1948" + d="m 802.64772,-263.21896 h -7.23225 c -3.59373,0 -6.99069,-2.4402 -9.64306,-4.9187 l -23.03544,-21.52548 c -2.54119,-2.59238 -6.04932,-4.91871 -9.64305,-4.91871 h -7.23226" + style="fill:none;stroke:#377eb8;stroke-width:3.75005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + inkscape:connector-curvature="0" /> + style="display:inline;fill:none;stroke:#377eb8;stroke-width:3.75005;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" + d="M 342.75995,-294.58186 H 746.82244" + id="path1897" + sodipodi:nodetypes="cc" /> - - + sodipodi:nodetypes="cc" /> + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3.75005;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" + d="m 340.88495,-289.89544 h 3.75 v 0.002 l 4.85596,-4.68933 -4.85596,-4.6875 h -3.75 z" + id="path1391" /> - - + id="g5484" + transform="translate(0,-1.5893806)" + style="stroke-width:1.00002"> + id="g7261" + transform="translate(165.32085,-16.655779)" + style="stroke-width:1.00002"> + + + cram + + + + + cram + + + + + 3.0 + transform="matrix(1,0,0,-1,45.615932,3461.2163)" + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0.01%;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro';letter-spacing:0px;word-spacing:0px;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:1.00002px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + id="flowRoot2227" + xml:space="preserve">cram + + + deepvariant + freebayes + manta + strelka2 + + + + + + + - - + transform="translate(-143.83149,0.84066785)" + style="display:inline;stroke-width:1.00002" + id="g5553-0"> + id="path5265-4" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 575.52539,395.2832 v 22.44922 h 7.5 V 395.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:7.8741;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + id="path5267-09" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 571.52539,397.2832 v 18.44922 h 4 V 397.2832 Z m 11.5,0 v 18.44922 h 4 V 397.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:4.00004;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + transform="translate(-143.83149,34.253667)" + style="display:inline;stroke-width:1.00002" + id="g5553-2"> + id="path5265-5" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 575.52539,395.2832 v 22.44922 h 7.5 V 395.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:7.8741;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> - + id="path5267-4" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 571.52539,397.2832 v 18.44922 h 4 V 397.2832 Z m 11.5,0 v 18.44922 h 4 V 397.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:4.00004;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + + convert + + + + + + + + + + + + + + + + + + + + + + + id="path5265-1" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 575.52539,395.2832 v 22.44922 h 7.5 V 395.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:7.8741;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + id="path5267-0" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 571.52539,397.2832 v 18.44922 h 4 V 397.2832 Z m 11.5,0 v 18.44922 h 4 V 397.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:4.00004;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + transform="translate(35.957934,34.253667)" + style="display:inline;stroke-width:1.00002" + id="g5553-42"> + id="path5265-22" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 575.52539,395.2832 v 22.44922 h 7.5 V 395.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:7.8741;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + id="path5267-05" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 571.52539,397.2832 v 18.44922 h 4 V 397.2832 Z m 11.5,0 v 18.44922 h 4 V 397.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:4.00004;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + + + id="path5265-29" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 575.52539,395.2832 v 22.44922 h 7.5 V 395.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:7.8741;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + id="path5267-02" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 571.52539,397.2832 v 18.44922 h 4 V 397.2832 Z m 11.5,0 v 18.44922 h 4 V 397.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:4.00004;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + + + id="path5265-3" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 575.52539,395.2832 v 22.44922 h 7.5 V 395.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:7.8741;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + id="path5267-8" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 571.52539,397.2832 v 18.44922 h 4 V 397.2832 Z m 11.5,0 v 18.44922 h 4 V 397.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:4.00004;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + + + sodipodi:nodetypes="ccccc" /> + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:4.00004;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" + d="m 571.52539,397.2832 v 18.44922 h 4 V 397.2832 Z m 11.5,0 v 18.44922 h 4 V 397.2832 Z" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + id="path5684" + inkscape:connector-curvature="0" /> + + transform="translate(-107.87364,34.253667)" + style="display:inline;stroke-width:1.00002" + id="g5553-49"> + id="path5265-93" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 575.52539,395.2832 v 22.44922 h 7.5 V 395.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:7.8741;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + id="path5267-60" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 571.52539,397.2832 v 18.44922 h 4 V 397.2832 Z m 11.5,0 v 18.44922 h 4 V 397.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:4.00004;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + + + id="path5265" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 575.52539,395.2832 v 22.44922 h 7.5 V 395.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:7.8741;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + id="path5267" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 571.52539,397.2832 v 18.44922 h 4 V 397.2832 Z m 11.5,0 v 18.44922 h 4 V 397.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:4.00004;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + + + id="path5265-2" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 575.52539,395.2832 v 22.44922 h 7.5 V 395.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:7.8741;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + + + transform="translate(-107.87364,0.84066785)" + style="display:inline;stroke-width:1.00002" + id="g5553-1"> + id="path5265-9" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 575.52539,395.2832 v 22.44922 h 7.5 V 395.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:7.8741;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + id="path5267-6" + transform="matrix(0.9375,0,0,0.9375,15.436598,-660.07976)" + d="m 571.52539,397.2832 v 18.44922 h 4 V 397.2832 Z m 11.5,0 v 18.44922 h 4 V 397.2832 Z" + style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:4.00004;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" /> + + + style="display:inline;fill:none;stroke:#000000;stroke-width:3.75005;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" + d="m 368.37125,-265.22032 v -9.375" + id="path9094" + sodipodi:nodetypes="cc" /> + sodipodi:nodetypes="cc" + id="path9096" + d="m 368.37125,-261.47032 v -9.375" + style="display:inline;fill:none;stroke:#000000;stroke-width:3.75005;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" + inkscape:connector-curvature="0" /> - - + + + 3.0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Example analysis pathways + style="font-style:normal;font-weight:normal;font-size:30px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.937508" + xml:space="preserve">Example analysis pathways + diff --git a/docs/usage.md b/docs/usage.md index a1303503e6..83046fe2d7 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -126,11 +126,11 @@ patient1,XX,1,tumor_sample,lane_2,test2_L002.bam patient1,XX,1,relapse_sample,lane_1,test3_L001.bam ``` -#### Start with duplicate marking and/or preparing recalibration (`--step prepare_recalibration`) +#### Start with duplicate marking (`--step markduplicates`) ##### Duplicate Marking -For starting from duplicate marking, the `CSV` file must contain at least the columns `patient`, `sample`, `bam`, `bai`. +For starting from duplicate marking, the `CSV` file must contain at least the columns `patient`, `sample`, `bam`, `bai` or `patient`, `sample`, `cram`, `crai` Example: @@ -139,12 +139,22 @@ patient,sample,bam,bai patient1,test_sample,test_mapped.bam,test_mapped.bam.bai ``` +```console +patient,sample,cram,crai +patient1,test_sample,test_mapped.cram,test_mapped.cram.crai +``` + ##### Prepare Recalibration -For starting directly from preparing recalibration and skipping duplicate marking, the `CSV` file must contain at least the columns `patient`, `sample`, `cram`, `crai` with _non-recalibrated CRAM_ files. Additionally, the parameter `--skip_tools markduplicates` must be set. +For starting directly from preparing recalibration, the `CSV` file must contain at least the columns `patient`, `sample`, `bam`, `bai` or `patient`, `sample`, `cram`, `crai`. Example: +```console +patient,sample,bam,bai +patient1,test_sample,test_mapped.bam,test_mapped.bam.bai +``` + ```console patient,sample,cram,crai patient1,test_sample,test_mapped.cram,test_mapped.cram.crai @@ -172,10 +182,15 @@ patient1,XX,1,relapse_sample,test3_mapped.cram,test3_mapped.cram.crai #### Start with base quality recalibration (`--step recalibrate`) -For starting from base quality recalibration the `CSV` file must contain at least the columns `patient`, `sample`, `cram`, `crai`, `table` containing the paths to _non-recalibrated CRAM_ files and the associated recalibration table. +For starting from base quality recalibration the `CSV` file must contain at least the columns `patient`, `sample`, `bam`, `bai`, `table` or `patient`, `sample`, `cram`, `crai`, `table` containing the paths to _non-recalibrated CRAM/BAM_ files and the associated recalibration table. Example: +```console +patient,sample,bam,bai,table +patient1,test_sample,test_mapped.cram,test_mapped.cram.crai,test.table +``` + ```console patient,sample,cram,crai,table patient1,test_sample,test_mapped.cram,test_mapped.cram.crai,test.table @@ -196,10 +211,15 @@ patient1,XX,1,relapse_sample,test3_mapped.cram,test3_mapped.cram.crai,test3.tabl #### Start with variant calling (`--step variant_calling`) -For starting from the variant calling step, the `CSV` file must contain at least the columns `patient`, `sample`, `cram`, `crai`. +For starting from the variant calling step, the `CSV` file must contain at least the columns `patient`, `sample`, `bam`, `bai` or `patient`, `sample`, `cram`, `crai`. Example: +```console +patient,sample,bam,bai +patient1,test_sample,test_mapped.bam,test_mapped.bam.bai +``` + ```console patient,sample,cram,crai patient1,test_sample,test_mapped.cram,test_mapped.cram.crai diff --git a/modules.json b/modules.json index 651c22ec8a..0aaca9c814 100644 --- a/modules.json +++ b/modules.json @@ -174,12 +174,12 @@ "samtools/bam2fq": { "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" }, - "samtools/bamtocram": { - "git_sha": "569e07f0af74e2a6ea43fca61ae90bb762893461" - }, "samtools/collatefastq": { "git_sha": "705f8c9ac4dfdf07666e71abde28f267e2dfd5eb" }, + "samtools/convert": { + "git_sha": "da79396f066a96450d9cc9f115c17c9d738595fd" + }, "samtools/faidx": { "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" }, diff --git a/modules/nf-core/modules/samtools/bamtocram/main.nf b/modules/nf-core/modules/samtools/convert/main.nf similarity index 62% rename from modules/nf-core/modules/samtools/bamtocram/main.nf rename to modules/nf-core/modules/samtools/convert/main.nf index b49c308fb9..e0e7d725d7 100644 --- a/modules/nf-core/modules/samtools/bamtocram/main.nf +++ b/modules/nf-core/modules/samtools/convert/main.nf @@ -1,5 +1,4 @@ -//There is a -L option to only output alignments in interval, might be an option for exons/panel data? -process SAMTOOLS_BAMTOCRAM { +process SAMTOOLS_CONVERT { tag "$meta.id" label 'process_medium' @@ -14,8 +13,8 @@ process SAMTOOLS_BAMTOCRAM { path fai output: - tuple val(meta), path("*.cram"), path("*.crai"), emit: cram_crai - path "versions.yml" , emit: versions + tuple val(meta), path("*.{cram,bam}"), path("*.{crai,bai}") , emit: alignment_index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -23,9 +22,17 @@ process SAMTOOLS_BAMTOCRAM { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def output_extension = input.getExtension() == "bam" ? "cram" : "bam" + """ - samtools view --threads ${task.cpus} --reference ${fasta} -C $args $input > ${prefix}.cram - samtools index -@${task.cpus} ${prefix}.cram + samtools view \\ + --threads ${task.cpus} \\ + --reference ${fasta} \\ + $args \\ + $input \\ + -o ${prefix}.${output_extension} + + samtools index -@${task.cpus} ${prefix}.${output_extension} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/bamtocram/meta.yml b/modules/nf-core/modules/samtools/convert/meta.yml similarity index 77% rename from modules/nf-core/modules/samtools/bamtocram/meta.yml rename to modules/nf-core/modules/samtools/convert/meta.yml index 037704c622..937b140332 100644 --- a/modules/nf-core/modules/samtools/bamtocram/meta.yml +++ b/modules/nf-core/modules/samtools/convert/meta.yml @@ -1,5 +1,5 @@ -name: samtools_bamtocram -description: filter/convert and then index CRAM file +name: samtools_convert +description: convert and then index CRAM -> BAM or BAM -> CRAM file keywords: - view - index @@ -23,12 +23,12 @@ input: e.g. [ id:'test', single_end:false ] - input: type: file - description: BAM/SAM file - pattern: "*.{bam,sam}" + description: BAM/CRAM file + pattern: "*.{bam,cram}" - index: type: file - description: BAM/SAM index file - pattern: "*.{bai,sai}" + description: BAM/CRAM index file + pattern: "*.{bai,crai}" - fasta: type: file description: Reference file to create the CRAM file @@ -39,10 +39,10 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - cram_crai: + - alignment_index: type: file - description: filtered/converted CRAM file + index - pattern: "*{.cram,.crai}" + description: filtered/converted BAM/CRAM file + index + pattern: "*{.bam/cram,.bai/crai}" - version: type: file description: File containing software version diff --git a/nextflow_schema.json b/nextflow_schema.json index d9b208744d..e15981ca7a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -20,6 +20,7 @@ "help_text": "Only one step", "enum": [ "mapping", + "markduplicates", "prepare_recalibration", "recalibrate", "variant_calling", diff --git a/subworkflows/local/markduplicates_csv.nf b/subworkflows/local/markduplicates_csv.nf index e8aa4214e0..a3aec4d42c 100644 --- a/subworkflows/local/markduplicates_csv.nf +++ b/subworkflows/local/markduplicates_csv.nf @@ -15,7 +15,6 @@ workflow MARKDUPLICATES_CSV { status = meta.status cram = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.cram" crai = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.cram.crai" - table = "${params.outdir}/preprocessing/${sample}/recal_table/${sample}.recal.table" - ["markduplicates_${sample}.csv", "patient,gender,status,sample,cram,crai,table\n${patient},${gender},${status},${sample},${cram},${crai},${table}\n"] - }.collectFile(name: 'markduplicates.csv', keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/preprocessing/csv") + ["markduplicates_no_table_${sample}.csv", "patient,gender,status,sample,cram,crai\n${patient},${gender},${status},${sample},${cram},${crai}\n"] + }.collectFile(name: 'markduplicates_no_table.csv', keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/preprocessing/csv") } diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals.nf index 9d74894f0f..1673a2af53 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals.nf @@ -37,7 +37,6 @@ workflow PREPARE_INTERVALS { ch_intervals_combined_bed_gz_tbi = Channel.fromPath(file("${params.outdir}/no_intervals.bed.{gz,gz.tbi}")) .collect() - ch_intervals_combined_bed_gz_tbi.view() } else if (params.step != 'annotate' && params.step != 'controlfreec') { diff --git a/subworkflows/local/prepare_recalibration_csv.nf b/subworkflows/local/prepare_recalibration_csv.nf index 7e79b658bf..fb45a349fa 100644 --- a/subworkflows/local/prepare_recalibration_csv.nf +++ b/subworkflows/local/prepare_recalibration_csv.nf @@ -4,17 +4,18 @@ workflow PREPARE_RECALIBRATION_CSV { take: - table_bqsr // channel: [mandatory] meta, table + cram_table_bqsr // channel: [mandatory] meta, cram, crai, table main: // Creating csv files to restart from this step - table_bqsr.collectFile(storeDir: "${params.outdir}/preprocessing/csv") { meta, table -> + cram_table_bqsr.collectFile(storeDir: "${params.outdir}/preprocessing/csv") { meta, cram, crai, table -> patient = meta.patient sample = meta.sample gender = meta.gender status = meta.status cram = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.cram" crai = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.cram.crai" - ["markduplicates_no_table_${sample}.csv", "patient,gender,status,sample,cram,crai\n${patient},${gender},${status},${sample},${cram},${crai}\n"] - }.collectFile(name: 'markduplicates_no_table.csv', keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/preprocessing/csv") + table = "${params.outdir}/preprocessing/${sample}/recal_table/${sample}.recal.table" + ["markduplicates_${sample}.csv", "patient,gender,status,sample,cram,crai,table\n${patient},${gender},${status},${sample},${cram},${crai},${table}\n"] + }.collectFile(name: 'markduplicates.csv', keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/preprocessing/csv") } diff --git a/subworkflows/nf-core/bam_to_cram.nf b/subworkflows/nf-core/bam_to_cram.nf index ec3801e5a4..3f1fe333cb 100644 --- a/subworkflows/nf-core/bam_to_cram.nf +++ b/subworkflows/nf-core/bam_to_cram.nf @@ -4,15 +4,17 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { DEEPTOOLS_BAMCOVERAGE } from '../../modules/nf-core/modules/deeptools/bamcoverage/main' -include { QUALIMAP_BAMQC } from '../../modules/nf-core/modules/qualimap/bamqc/main' -include { SAMTOOLS_BAMTOCRAM } from '../../modules/nf-core/modules/samtools/bamtocram/main' +include { DEEPTOOLS_BAMCOVERAGE } from '../../modules/nf-core/modules/deeptools/bamcoverage/main' +include { QUALIMAP_BAMQCCRAM } from '../../modules/nf-core/modules/qualimap/bamqccram/main' +include { SAMTOOLS_CONVERT as SAMTOOLS_BAMTOCRAM } from '../../modules/nf-core/modules/samtools/convert/main' +include { SAMTOOLS_STATS as SAMTOOLS_STATS_CRAM } from '../../modules/nf-core/modules/samtools/stats/main' workflow BAM_TO_CRAM { take: bam_indexed // channel: [mandatory] meta, bam, bai + cram_indexed fasta // channel: [mandatory] fasta - fai // channel: [mandatory] fai + fasta_fai // channel: [mandatory] fai intervals_combined_bed_gz_tbi // channel: [optional] intervals_bed.gz, intervals_bed.gz.tbi main: @@ -23,26 +25,29 @@ workflow BAM_TO_CRAM { bam_no_index = bam_indexed.map{ meta, bam, bai -> [meta, bam] } // Convert bam input to cram - SAMTOOLS_BAMTOCRAM(bam_indexed, fasta, fai) + SAMTOOLS_BAMTOCRAM(bam_indexed, fasta, fasta_fai) - // Reports on bam input - DEEPTOOLS_BAMCOVERAGE(bam_indexed) - QUALIMAP_BAMQC(bam_no_index, intervals_combined_bed_gz_tbi) + cram_indexed = Channel.empty().mix(cram_indexed,SAMTOOLS_BAMTOCRAM.out.alignment_index) - // Other reports run on cram + // Reports on cram + DEEPTOOLS_BAMCOVERAGE(cram_indexed) + QUALIMAP_BAMQCCRAM(cram_indexed, intervals_combined_bed_gz_tbi, fasta, fasta_fai) + SAMTOOLS_STATS_CRAM(cram_indexed, fasta) // Gather all reports generated qc_reports = qc_reports.mix(DEEPTOOLS_BAMCOVERAGE.out.bigwig) - qc_reports = qc_reports.mix(QUALIMAP_BAMQC.out.results) + qc_reports = qc_reports.mix(QUALIMAP_BAMQCCRAM.out.results) + qc_reports = qc_reports.mix(SAMTOOLS_STATS_CRAM.out.stats) // Gather versions of all tools used ch_versions = ch_versions.mix(DEEPTOOLS_BAMCOVERAGE.out.versions.first()) - ch_versions = ch_versions.mix(QUALIMAP_BAMQC.out.versions.first()) + ch_versions = ch_versions.mix(QUALIMAP_BAMQCCRAM.out.versions.first()) ch_versions = ch_versions.mix(SAMTOOLS_BAMTOCRAM.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_STATS_CRAM.out.versions) emit: - cram = SAMTOOLS_BAMTOCRAM.out.cram_crai - qc = qc_reports + cram_converted = SAMTOOLS_BAMTOCRAM.out.alignment_index + qc = qc_reports versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/gatk4/markduplicates/main.nf b/subworkflows/nf-core/gatk4/markduplicates/main.nf index 0d7393cb3a..f211a8b54c 100644 --- a/subworkflows/nf-core/gatk4/markduplicates/main.nf +++ b/subworkflows/nf-core/gatk4/markduplicates/main.nf @@ -22,7 +22,7 @@ workflow MARKDUPLICATES { GATK4_MARKDUPLICATES(bam) // Convert output to cram - BAM_TO_CRAM(GATK4_MARKDUPLICATES.out.bam.join(GATK4_MARKDUPLICATES.out.bai), fasta, fasta_fai, intervals_combined_bed_gz_tbi) + BAM_TO_CRAM(GATK4_MARKDUPLICATES.out.bam.join(GATK4_MARKDUPLICATES.out.bai), Channel.empty(), fasta, fasta_fai, intervals_combined_bed_gz_tbi) // Gather all reports generated qc_reports = qc_reports.mix(GATK4_MARKDUPLICATES.out.metrics) @@ -32,7 +32,7 @@ workflow MARKDUPLICATES { ch_versions = ch_versions.mix(BAM_TO_CRAM.out.versions) emit: - cram = BAM_TO_CRAM.out.cram + cram = BAM_TO_CRAM.out.cram_converted qc = qc_reports versions = ch_versions // channel: [ versions.yml ] diff --git a/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf b/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf index ac010b5adc..0b0fb30bf4 100644 --- a/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf +++ b/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf @@ -33,13 +33,13 @@ workflow MARKDUPLICATES_SPARK { .join(INDEX_MARKDUPLICATES.out.crai) // Convert Markupduplicates spark bam output to cram when running bamqc and/or deeptools - BAM_TO_CRAM(bam_bai, fasta, fasta_fai, intervals_combined_bed_gz_tbi) + BAM_TO_CRAM(bam_bai, Channel.empty(), fasta, fasta_fai, intervals_combined_bed_gz_tbi) // Only one of these channel is not empty: // - running Markupduplicates spark with bam output // - running Markupduplicates spark with cram output cram_markduplicates = Channel.empty().mix( - BAM_TO_CRAM.out.cram, + BAM_TO_CRAM.out.cram_converted, cram_crai) // When running Marduplicates spark, and saving reports diff --git a/tests/csv/3.0/mapped_single.csv b/tests/csv/3.0/mapped_single_bam.csv similarity index 100% rename from tests/csv/3.0/mapped_single.csv rename to tests/csv/3.0/mapped_single_bam.csv diff --git a/tests/csv/3.0/mapped_single_cram.csv b/tests/csv/3.0/mapped_single_cram.csv new file mode 100644 index 0000000000..1baa471c41 --- /dev/null +++ b/tests/csv/3.0/mapped_single_cram.csv @@ -0,0 +1,2 @@ +patient,status,sample,cram,crai +test,0,test,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai diff --git a/tests/csv/3.0/prepare_recalibration_single_bam.csv b/tests/csv/3.0/prepare_recalibration_single_bam.csv new file mode 100644 index 0000000000..a61c3f8222 --- /dev/null +++ b/tests/csv/3.0/prepare_recalibration_single_bam.csv @@ -0,0 +1,2 @@ +patient,status,sample,bam,bai,table +test,0,test,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table diff --git a/tests/csv/3.0/prepare_recalibration_single_cram.csv b/tests/csv/3.0/prepare_recalibration_single_cram.csv new file mode 100644 index 0000000000..4adc8fa105 --- /dev/null +++ b/tests/csv/3.0/prepare_recalibration_single_cram.csv @@ -0,0 +1,2 @@ +patient,status,sample,cram,crai,table +test,0,test,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table diff --git a/tests/csv/3.0/recalibrated.csv b/tests/csv/3.0/recalibrated.csv index 407a3b4cb5..17d912e966 100644 --- a/tests/csv/3.0/recalibrated.csv +++ b/tests/csv/3.0/recalibrated.csv @@ -1,6 +1,5 @@ patient,gender,status,sample,cram,crai test,XX,0,sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai -test2,XX,0,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai -test2,XX,1,sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai +test1,XX,1,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai +test3,XX,0,sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai test3,XX,1,sample4,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai -test3,XX,1,sample5,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai diff --git a/tests/subworkflows/nf-core/markduplicates/test.yml b/tests/subworkflows/nf-core/markduplicates/test.yml index 5484183205..35deb59d70 100644 --- a/tests/subworkflows/nf-core/markduplicates/test.yml +++ b/tests/subworkflows/nf-core/markduplicates/test.yml @@ -1,7 +1,7 @@ - name: subworkflow markduplicates command: nextflow run ./tests/subworkflows/nf-core/markduplicates/ -entry test_markduplicates -c tests/config/nextflow.config tags: - - markduplicates + - markduplicates_subworkflow - gatk4 files: - path: output/preprocessing/test/markduplicates/test.md.bam diff --git a/tests/test_default.yml b/tests/test_default.yml index 7bff088d69..371e805215 100644 --- a/tests/test_default.yml +++ b/tests/test_default.yml @@ -50,3 +50,33 @@ - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats - path: results/reports/deeptools/test/test.bigWig + +- name: Run default pipeline with VC + command: nextflow run main.nf -profile test,docker --tools strelka + tags: + - default + - preprocessing + files: + - path: results/multiqc + - path: results/preprocessing/test/markduplicates/test.md.cram + - path: results/preprocessing/test/markduplicates/test.md.cram.crai + - path: results/preprocessing/test/recal_table/test.recal.table + - path: results/preprocessing/test/recalibrated/test.recal.cram + - path: results/preprocessing/test/recalibrated/test.recal.cram.crai + - path: results/preprocessing/csv/markduplicates.csv + - path: results/preprocessing/csv/markduplicates_test.csv + - path: results/preprocessing/csv/markduplicates_no_table.csv + - path: results/preprocessing/csv/markduplicates_no_table_test.csv + - path: results/preprocessing/csv/recalibrated.csv + - path: results/preprocessing/csv/recalibrated_test.csv + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.metrics + - path: results/reports/qualimap/test/test.mapped + - path: results/reports/qualimap/test/test.recal + - path: results/reports/samtools_stats/test/test.md.cram.stats + - path: results/reports/samtools_stats/test/test.recal.cram.stats + - path: results/reports/deeptools/test/test.bigWig + - path: results/variant_calling/test/strelka/test.genome.vcf.gz + - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi + - path: results/variant_calling/test/strelka/test.variants.vcf.gz + - path: results/variant_calling/test/strelka/test.variants.vcf.gz.tbi diff --git a/tests/test_markduplicates.yml b/tests/test_markduplicates.yml new file mode 100644 index 0000000000..a2f510de51 --- /dev/null +++ b/tests/test_markduplicates.yml @@ -0,0 +1,55 @@ +- name: Run Mark Duplicates starting from BAM + command: nextflow run main.nf -profile test,markduplicates_bam,docker --tools strelka + tags: + - bam + - markduplicates + - preprocessing + files: + - path: results/multiqc + - path: results/preprocessing/test/markduplicates/test.md.cram + - path: results/preprocessing/test/markduplicates/test.md.cram.crai + - path: results/preprocessing/test/recal_table/test.recal.table + - path: results/preprocessing/test/recalibrated/test.recal.cram + - path: results/preprocessing/test/recalibrated/test.recal.cram.crai + - path: results/preprocessing/csv/markduplicates.csv + - path: results/preprocessing/csv/markduplicates_test.csv + - path: results/preprocessing/csv/markduplicates_no_table.csv + - path: results/preprocessing/csv/markduplicates_no_table_test.csv + - path: results/preprocessing/csv/recalibrated.csv + - path: results/preprocessing/csv/recalibrated_test.csv + - path: results/reports/qualimap/test/test.mapped + - path: results/reports/qualimap/test/test.recal + - path: results/reports/samtools_stats/test/test.md.cram.stats + - path: results/reports/samtools_stats/test/test.recal.cram.stats + - path: results/variant_calling/test/strelka/test.genome.vcf.gz + - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi + - path: results/variant_calling/test/strelka/test.variants.vcf.gz + - path: results/variant_calling/test/strelka/test.variants.vcf.gz.tbi + +- name: Run Mark Duplicates starting from CRAM + command: nextflow run main.nf -profile test,markduplicates_cram,docker --tools strelka + tags: + - cram + - markduplicates + - preprocessing + files: + - path: results/multiqc + - path: results/preprocessing/test/markduplicates/test.md.cram + - path: results/preprocessing/test/markduplicates/test.md.cram.crai + - path: results/preprocessing/test/recal_table/test.recal.table + - path: results/preprocessing/test/recalibrated/test.recal.cram + - path: results/preprocessing/test/recalibrated/test.recal.cram.crai + - path: results/preprocessing/csv/markduplicates.csv + - path: results/preprocessing/csv/markduplicates_test.csv + - path: results/preprocessing/csv/markduplicates_no_table.csv + - path: results/preprocessing/csv/markduplicates_no_table_test.csv + - path: results/preprocessing/csv/recalibrated.csv + - path: results/preprocessing/csv/recalibrated_test.csv + - path: results/reports/qualimap/test/test.mapped + - path: results/reports/qualimap/test/test.recal + - path: results/reports/samtools_stats/test/test.md.cram.stats + - path: results/reports/samtools_stats/test/test.recal.cram.stats + - path: results/variant_calling/test/strelka/test.genome.vcf.gz + - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi + - path: results/variant_calling/test/strelka/test.variants.vcf.gz + - path: results/variant_calling/test/strelka/test.variants.vcf.gz.tbi diff --git a/tests/test_only_paired_VC.yml b/tests/test_only_paired_VC.yml index 6c67407385..a8d4882b1c 100644 --- a/tests/test_only_paired_VC.yml +++ b/tests/test_only_paired_VC.yml @@ -3,7 +3,6 @@ tags: - somatic - strelka - - variant_calling - variantcalling_channel files: - path: results/variant_calling/sample1/strelka/sample1.variants.vcf.gz @@ -32,7 +31,6 @@ tags: - somatic - strelka - - variant_calling - variantcalling_channel files: - path: results/variant_calling/sample1/strelka/sample1.variants.vcf.gz diff --git a/tests/test_prepare_recalibration.yml b/tests/test_prepare_recalibration.yml index f35a05ea73..87e6ea8466 100644 --- a/tests/test_prepare_recalibration.yml +++ b/tests/test_prepare_recalibration.yml @@ -1,22 +1,99 @@ -- name: Run Prepare_recal - command: nextflow run main.nf -profile test,prepare_recalibration,docker +- name: Run Prepare_recal starting from bam + command: nextflow run main.nf -profile test,prepare_recalibration_bam,docker --tools strelka tags: + - bam + - prepare_recalibration + - preprocessing + files: + - path: results/multiqc + - path: results/preprocessing/test/recal_table/test.recal.table + - path: results/preprocessing/test/recalibrated/test.recal.cram + - path: results/preprocessing/test/recalibrated/test.recal.cram.crai + - path: results/preprocessing/csv/recalibrated.csv + - path: results/preprocessing/csv/recalibrated_test.csv + - path: results/reports/qualimap/test/test.recal + - path: results/reports/samtools_stats/test/test.recal.cram.stats + - path: results/variant_calling/test/strelka/test.genome.vcf.gz + - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi + - path: results/variant_calling/test/strelka/test.variants.vcf.gz + - path: results/variant_calling/test/strelka/test.variants.vcf.gz.tbi + +- name: Run Prepare_recal starting from cram + command: nextflow run main.nf -profile test,prepare_recalibration_cram,docker --tools strelka + tags: + - cram + - prepare_recalibration + - preprocessing + files: + - path: results/multiqc + - path: results/preprocessing/test/recal_table/test.recal.table + - path: results/preprocessing/test/recalibrated/test.recal.cram + - path: results/preprocessing/test/recalibrated/test.recal.cram.crai + - path: results/preprocessing/csv/recalibrated.csv + - path: results/preprocessing/csv/recalibrated_test.csv + - path: results/reports/qualimap/test/test.recal + - path: results/reports/samtools_stats/test/test.recal.cram.stats + - path: results/variant_calling/test/strelka/test.genome.vcf.gz + - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi + - path: results/variant_calling/test/strelka/test.variants.vcf.gz + - path: results/variant_calling/test/strelka/test.variants.vcf.gz.tbi + +- name: Run Prepare_recal starting from bam and skip baserecalibration + command: nextflow run main.nf -profile test,prepare_recalibration_bam,skip_bqsr,docker --tools strelka + tags: + - bam + - prepare_recalibration + - preprocessing + files: + - path: results/multiqc + - path: results/preprocessing/test/markduplicates/test.md.cram + - path: results/preprocessing/test/markduplicates/test.md.cram.crai + - path: results/preprocessing/test/recal_table/test.recal.table + should_exist: false + - path: results/preprocessing/test/recalibrated/test.recal.cram + should_exist: false + - path: results/preprocessing/test/recalibrated/test.recal.cram.crai + should_exist: false + - path: results/preprocessing/csv/recalibrated.csv + should_exist: false + - path: results/preprocessing/csv/recalibrated_test.csv + should_exist: false + - path: results/reports/qualimap/test/test.recal + should_exist: false + - path: results/reports/samtools_stats/test/test.recal.cram.stats + should_exist: false + - path: results/variant_calling/test/strelka/test.genome.vcf.gz + - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi + - path: results/variant_calling/test/strelka/test.variants.vcf.gz + - path: results/variant_calling/test/strelka/test.variants.vcf.gz.tbi + +- name: Run Prepare_recal starting from cram and skip baserecalibration + command: nextflow run main.nf -profile test,prepare_recalibration_cram,skip_bqsr,docker --tools strelka + tags: + - cram - prepare_recalibration - preprocessing files: - path: results/multiqc - path: results/preprocessing/test/markduplicates/test.md.cram + should_exist: false - path: results/preprocessing/test/markduplicates/test.md.cram.crai + should_exist: false - path: results/preprocessing/test/recal_table/test.recal.table + should_exist: false - path: results/preprocessing/test/recalibrated/test.recal.cram + should_exist: false - path: results/preprocessing/test/recalibrated/test.recal.cram.crai - - path: results/preprocessing/csv/markduplicates.csv - - path: results/preprocessing/csv/markduplicates_test.csv - - path: results/preprocessing/csv/markduplicates_no_table.csv - - path: results/preprocessing/csv/markduplicates_no_table_test.csv + should_exist: false - path: results/preprocessing/csv/recalibrated.csv + should_exist: false - path: results/preprocessing/csv/recalibrated_test.csv - - path: results/reports/qualimap/test/test.mapped + should_exist: false - path: results/reports/qualimap/test/test.recal - - path: results/reports/samtools_stats/test/test.md.cram.stats + should_exist: false - path: results/reports/samtools_stats/test/test.recal.cram.stats + should_exist: false + - path: results/variant_calling/test/strelka/test.genome.vcf.gz + - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi + - path: results/variant_calling/test/strelka/test.variants.vcf.gz + - path: results/variant_calling/test/strelka/test.variants.vcf.gz.tbi diff --git a/tests/test_recalibrate.yml b/tests/test_recalibrate.yml new file mode 100644 index 0000000000..0f67a36610 --- /dev/null +++ b/tests/test_recalibrate.yml @@ -0,0 +1,97 @@ +- name: Run Recalibration starting from bam + command: nextflow run main.nf -profile test,recalibrate_bam,docker --tools strelka + tags: + - bam + - recalibrate + - preprocessing + files: + - path: results/multiqc + - path: results/preprocessing/test/recalibrated/test.recal.cram + - path: results/preprocessing/test/recalibrated/test.recal.cram.crai + - path: results/reports/qualimap/test/test.recal + - path: results/reports/samtools_stats/test/test.recal.cram.stats + - path: results/preprocessing/csv/recalibrated.csv + - path: results/preprocessing/csv/recalibrated_test.csv + - path: results/variant_calling/test/strelka/test.genome.vcf.gz + - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi + - path: results/variant_calling/test/strelka/test.variants.vcf.gz + - path: results/variant_calling/test/strelka/test.variants.vcf.gz.tbi + +- name: Run Recalibration starting from cram + command: nextflow run main.nf -profile test,recalibrate_cram,docker --tools strelka + tags: + - cram + - recalibrate + - preprocessing + files: + - path: results/multiqc + - path: results/preprocessing/test/recalibrated/test.recal.cram + - path: results/preprocessing/test/recalibrated/test.recal.cram.crai + - path: results/reports/qualimap/test/test.recal + - path: results/reports/samtools_stats/test/test.recal.cram.stats + - path: results/preprocessing/csv/recalibrated.csv + - path: results/preprocessing/csv/recalibrated_test.csv + - path: results/variant_calling/test/strelka/test.genome.vcf.gz + - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi + - path: results/variant_calling/test/strelka/test.variants.vcf.gz + - path: results/variant_calling/test/strelka/test.variants.vcf.gz.tbi + +- name: Run Recalibration starting from bam and skip baserecalibration + command: nextflow run main.nf -profile test,recalibrate_bam,skip_bqsr,docker --tools strelka + tags: + - bam + - recalibrate + - preprocessing + files: + - path: results/multiqc + - path: results/preprocessing/test/markduplicates/test.md.cram + - path: results/preprocessing/test/markduplicates/test.md.cram.crai + - path: results/preprocessing/test/recal_table/test.recal.table + should_exist: false + - path: results/preprocessing/test/recalibrated/test.recal.cram + should_exist: false + - path: results/preprocessing/test/recalibrated/test.recal.cram.crai + should_exist: false + - path: results/preprocessing/csv/recalibrated.csv + should_exist: false + - path: results/preprocessing/csv/recalibrated_test.csv + should_exist: false + - path: results/reports/qualimap/test/test.recal + should_exist: false + - path: results/reports/samtools_stats/test/test.recal.cram.stats + should_exist: false + - path: results/variant_calling/test/strelka/test.genome.vcf.gz + - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi + - path: results/variant_calling/test/strelka/test.variants.vcf.gz + - path: results/variant_calling/test/strelka/test.variants.vcf.gz.tbi + +- name: Run Recalibration starting from cram and skip baserecalibration + command: nextflow run main.nf -profile test,recalibrate_cram,skip_bqsr,docker --tools strelka + tags: + - cram + - recalibrate + - preprocessing + files: + - path: results/multiqc + - path: results/preprocessing/test/markduplicates/test.md.cram + should_exist: false + - path: results/preprocessing/test/markduplicates/test.md.cram.crai + should_exist: false + - path: results/preprocessing/test/recal_table/test.recal.table + should_exist: false + - path: results/preprocessing/test/recalibrated/test.recal.cram + should_exist: false + - path: results/preprocessing/test/recalibrated/test.recal.cram.crai + should_exist: false + - path: results/preprocessing/csv/recalibrated.csv + should_exist: false + - path: results/preprocessing/csv/recalibrated_test.csv + should_exist: false + - path: results/reports/qualimap/test/test.recal + should_exist: false + - path: results/reports/samtools_stats/test/test.recal.cram.stats + should_exist: false + - path: results/variant_calling/test/strelka/test.genome.vcf.gz + - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi + - path: results/variant_calling/test/strelka/test.variants.vcf.gz + - path: results/variant_calling/test/strelka/test.variants.vcf.gz.tbi diff --git a/tests/test_skip_markduplicates.yml b/tests/test_skip_markduplicates.yml index fd6f9cf634..52004f66d6 100644 --- a/tests/test_skip_markduplicates.yml +++ b/tests/test_skip_markduplicates.yml @@ -1,7 +1,6 @@ - name: Run default pipeline with skipping MarkDuplicates - command: nextflow run main.nf -profile test,docker,skip_markduplicates + command: nextflow run main.nf -profile test,docker,skip_markduplicates --tools strelka tags: - - markduplicates - preprocessing - skip_markduplicates files: @@ -9,14 +8,14 @@ - path: results/preprocessing/test/mapped/test.bam - path: results/preprocessing/test/mapped/test.bam.bai - path: results/preprocessing/test/recal_table/test.recal.table - - path: results/preprocessing/test/markduplicates/test.md.cram - - path: results/preprocessing/test/markduplicates/test.md.cram.crai + - path: results/preprocessing/test/markduplicates/test.md.cram # These are no actually duplicate marked crams, but just mapped converted crams TODO renaming + - path: results/preprocessing/test/markduplicates/test.md.cram.crai # TODO These are no actually duplicate marked crams, but just mapped converted crams TODO renaming - path: results/preprocessing/test/recalibrated/test.recal.cram - path: results/preprocessing/test/recalibrated/test.recal.cram.crai - path: results/preprocessing/csv/markduplicates.csv - path: results/preprocessing/csv/markduplicates_test.csv - - path: results/preprocessing/csv/markduplicates_no_table.csv - - path: results/preprocessing/csv/markduplicates_no_table_test.csv + - path: results/preprocessing/csv/markduplicates_no_table.csv # TODO These are no actually duplicate marked crams, but just mapped converted crams TODO renaming + - path: results/preprocessing/csv/markduplicates_no_table_test.csv # TODO These are no actually duplicate marked crams, but just mapped converted crams TODO renaming - path: results/preprocessing/csv/recalibrated.csv - path: results/preprocessing/csv/recalibrated_test.csv - path: results/reports/fastqc/test-test_L1 @@ -25,29 +24,71 @@ - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats - path: results/reports/deeptools/test/test.bigWig + - path: results/variant_calling/test/strelka/test.genome.vcf.gz + - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi + - path: results/variant_calling/test/strelka/test.variants.vcf.gz + - path: results/variant_calling/test/strelka/test.variants.vcf.gz.tbi -- name: Run skip markduplicates preparerecal - command: nextflow run main.nf -profile test,docker,prepare_recalibration,skip_markduplicates +- name: Run skip markduplicates bam from step markduplicates + command: nextflow run main.nf -profile test,docker,markduplicates_bam,skip_markduplicates --tools strelka tags: - - markduplicates - - prepare_recalibration + - bam - preprocessing - skip_markduplicates files: - path: results/multiqc + - path: results/preprocessing/test/markduplicates/test.md.cram # TODO These are no actually duplicate marked crams, but just mapped converted crams TODO renaming + - path: results/preprocessing/test/markduplicates/test.md.cram.crai # TODO These are no actually duplicate marked crams, but just mapped converted crams TODO renaming - path: results/preprocessing/test/recal_table/test.recal.table + - path: results/preprocessing/test/recalibrated/test.recal.cram + - path: results/preprocessing/test/recalibrated/test.recal.cram.crai + - path: results/preprocessing/csv/recalibrated.csv + - path: results/preprocessing/csv/recalibrated_test.csv + - path: results/preprocessing/csv/markduplicates.csv + - path: results/preprocessing/csv/markduplicates_test.csv + - path: results/preprocessing/csv/markduplicates_no_table.csv # TODO These are no actually duplicate marked crams, but just mapped converted crams TODO renaming + - path: results/preprocessing/csv/markduplicates_no_table_test.csv # TODO These are no actually duplicate marked crams, but just mapped converted crams TODO renaming + - path: results/reports/qualimap/test/test.mapped + - path: results/reports/qualimap/test/test.recal + - path: results/reports/samtools_stats/test/test.md.cram.stats + - path: results/reports/samtools_stats/test/test.recal.cram.stats + - path: results/reports/deeptools/test/test.bigWig + - path: results/variant_calling/test/strelka/test.genome.vcf.gz + - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi + - path: results/variant_calling/test/strelka/test.variants.vcf.gz + - path: results/variant_calling/test/strelka/test.variants.vcf.gz.tbi + +- name: Run skip markduplicates cram from step markduplicates + command: nextflow run main.nf -profile test,docker,markduplicates_cram,skip_markduplicates --tools strelka + tags: + - cram + - preprocessing + - skip_markduplicates + files: + - path: results/multiqc - path: results/preprocessing/test/markduplicates/test.md.cram + should_exist: false - path: results/preprocessing/test/markduplicates/test.md.cram.crai + should_exist: false + - path: results/preprocessing/test/recal_table/test.recal.table - path: results/preprocessing/test/recalibrated/test.recal.cram - path: results/preprocessing/test/recalibrated/test.recal.cram.crai + - path: results/preprocessing/csv/recalibrated.csv + - path: results/preprocessing/csv/recalibrated_test.csv - path: results/preprocessing/csv/markduplicates.csv + should_exist: false - path: results/preprocessing/csv/markduplicates_test.csv + should_exist: false - path: results/preprocessing/csv/markduplicates_no_table.csv + should_exist: false - path: results/preprocessing/csv/markduplicates_no_table_test.csv - - path: results/preprocessing/csv/recalibrated.csv - - path: results/preprocessing/csv/recalibrated_test.csv - # - path: results/reports/qualimap/test/test.mapped - # - path: results/reports/qualimap/test/test.recal - - path: results/reports/samtools_stats/test/test.md.cram.stats + should_exist: false + - path: results/reports/qualimap/test/test.mapped + - path: results/reports/qualimap/test/test.recal + - path: results/reports/samtools_stats/test/test.paired_end.sorted.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats - path: results/reports/deeptools/test/test.bigWig + - path: results/variant_calling/test/strelka/test.genome.vcf.gz + - path: results/variant_calling/test/strelka/test.genome.vcf.gz.tbi + - path: results/variant_calling/test/strelka/test.variants.vcf.gz + - path: results/variant_calling/test/strelka/test.variants.vcf.gz.tbi diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 755565c931..ad2e161937 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -49,6 +49,7 @@ else { log.warn "No samplesheet specified, attempting to restart from csv files present in ${params.outdir}" switch (params.step) { case 'mapping': exit 1, "Can't start with step $params.step without samplesheet" + //case 'markduplicates': csv_file = file("${params.outdir}/preprocessing/csv/markduplicates_no_table.csv", checkIfExists: true); break case 'prepare_recalibration': csv_file = file("${params.outdir}/preprocessing/csv/markduplicates_no_table.csv", checkIfExists: true); break case 'recalibrate': csv_file = file("${params.outdir}/preprocessing/csv/markduplicates.csv", checkIfExists: true); break case 'variant_calling': csv_file = file("${params.outdir}/preprocessing/csv/recalibrated.csv", checkIfExists: true); break @@ -116,83 +117,85 @@ umi_read_structure = params.umi_read_structure ? "${params.umi_read_structure} $ */ // Create samplesheets to restart from different steps -include { MAPPING_CSV } from '../subworkflows/local/mapping_csv' -include { MARKDUPLICATES_CSV } from '../subworkflows/local/markduplicates_csv' -include { PREPARE_RECALIBRATION_CSV } from '../subworkflows/local/prepare_recalibration_csv' -include { RECALIBRATE_CSV } from '../subworkflows/local/recalibrate_csv' +include { MAPPING_CSV } from '../subworkflows/local/mapping_csv' +include { MARKDUPLICATES_CSV } from '../subworkflows/local/markduplicates_csv' +include { PREPARE_RECALIBRATION_CSV } from '../subworkflows/local/prepare_recalibration_csv' +include { RECALIBRATE_CSV } from '../subworkflows/local/recalibrate_csv' // Build indices if needed -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' // Build intervals if needed -include { PREPARE_INTERVALS } from '../subworkflows/local/prepare_intervals' +include { PREPARE_INTERVALS } from '../subworkflows/local/prepare_intervals' // Convert BAM files to FASTQ files -include { ALIGNMENT_TO_FASTQ as ALIGNMENT_TO_FASTQ_INPUT } from '../subworkflows/nf-core/alignment_to_fastq' -include { ALIGNMENT_TO_FASTQ as ALIGNMENT_TO_FASTQ_UMI } from '../subworkflows/nf-core/alignment_to_fastq' +include { ALIGNMENT_TO_FASTQ as ALIGNMENT_TO_FASTQ_INPUT } from '../subworkflows/nf-core/alignment_to_fastq' +include { ALIGNMENT_TO_FASTQ as ALIGNMENT_TO_FASTQ_UMI } from '../subworkflows/nf-core/alignment_to_fastq' // Split FASTQ files -include { SPLIT_FASTQ } from '../subworkflows/local/split_fastq' +include { SPLIT_FASTQ } from '../subworkflows/local/split_fastq' // Run FASTQC -include { RUN_FASTQC } from '../subworkflows/nf-core/run_fastqc' +include { RUN_FASTQC } from '../subworkflows/nf-core/run_fastqc' // Run TRIMGALORE -include { RUN_TRIMGALORE } from '../subworkflows/nf-core/run_trimgalore' +include { RUN_TRIMGALORE } from '../subworkflows/nf-core/run_trimgalore' // Create umi consensus bams from fastq -include { CREATE_UMI_CONSENSUS } from '../subworkflows/nf-core/fgbio_create_umi_consensus/main' +include { CREATE_UMI_CONSENSUS } from '../subworkflows/nf-core/fgbio_create_umi_consensus/main' // Map input reads to reference genome -include { GATK4_MAPPING } from '../subworkflows/nf-core/gatk4/mapping/main' +include { GATK4_MAPPING } from '../subworkflows/nf-core/gatk4/mapping/main' // Merge and index BAM files (optional) -include { MERGE_INDEX_BAM } from '../subworkflows/nf-core/merge_index_bam' +include { MERGE_INDEX_BAM } from '../subworkflows/nf-core/merge_index_bam' +include { SAMTOOLS_CONVERT as SAMTOOLS_CRAMTOBAM } from '../modules/nf-core/modules/samtools/convert/main' +include { SAMTOOLS_CONVERT as SAMTOOLS_BAMTOCRAM } from '../modules/nf-core/modules/samtools/convert/main' +include { SAMTOOLS_CONVERT as SAMTOOLS_BAMTOCRAM_VARIANTCALLING} from '../modules/nf-core/modules/samtools/convert/main' // Mark Duplicates (+QC) -include { MARKDUPLICATES } from '../subworkflows/nf-core/gatk4/markduplicates/main' +include { MARKDUPLICATES } from '../subworkflows/nf-core/gatk4/markduplicates/main' // Mark Duplicates SPARK (+QC) -include { MARKDUPLICATES_SPARK } from '../subworkflows/nf-core/gatk4/markduplicates_spark/main' +include { MARKDUPLICATES_SPARK } from '../subworkflows/nf-core/gatk4/markduplicates_spark/main' // Convert to CRAM (+QC) -include { BAM_TO_CRAM } from '../subworkflows/nf-core/bam_to_cram' +include { BAM_TO_CRAM } from '../subworkflows/nf-core/bam_to_cram' // QC on CRAM -include { SAMTOOLS_STATS as SAMTOOLS_STATS_CRAM } from '../modules/nf-core/modules/samtools/stats/main' -include { CRAM_QC } from '../subworkflows/nf-core/cram_qc' +include { CRAM_QC } from '../subworkflows/nf-core/cram_qc' // Create recalibration tables -include { PREPARE_RECALIBRATION } from '../subworkflows/nf-core/gatk4/prepare_recalibration/main' +include { PREPARE_RECALIBRATION } from '../subworkflows/nf-core/gatk4/prepare_recalibration/main' // Create recalibration tables SPARK -include { PREPARE_RECALIBRATION_SPARK } from '../subworkflows/nf-core/gatk4/prepare_recalibration_spark/main' +include { PREPARE_RECALIBRATION_SPARK } from '../subworkflows/nf-core/gatk4/prepare_recalibration_spark/main' // Create recalibrated cram files to use for variant calling (+QC) -include { RECALIBRATE } from '../subworkflows/nf-core/gatk4/recalibrate/main' +include { RECALIBRATE } from '../subworkflows/nf-core/gatk4/recalibrate/main' // Create recalibrated cram files to use for variant calling (+QC) -include { RECALIBRATE_SPARK } from '../subworkflows/nf-core/gatk4/recalibrate_spark/main' +include { RECALIBRATE_SPARK } from '../subworkflows/nf-core/gatk4/recalibrate_spark/main' // Variant calling on a single normal sample -include { GERMLINE_VARIANT_CALLING } from '../subworkflows/local/germline_variant_calling' +include { GERMLINE_VARIANT_CALLING } from '../subworkflows/local/germline_variant_calling' // Variant calling on a single tumor sample -include { TUMOR_ONLY_VARIANT_CALLING } from '../subworkflows/local/tumor_variant_calling' +include { TUMOR_ONLY_VARIANT_CALLING } from '../subworkflows/local/tumor_variant_calling' // Variant calling on tumor/normal pair -include { PAIR_VARIANT_CALLING } from '../subworkflows/local/pair_variant_calling' +include { PAIR_VARIANT_CALLING } from '../subworkflows/local/pair_variant_calling' -include { VCF_QC } from '../subworkflows/nf-core/vcf_qc' +include { VCF_QC } from '../subworkflows/nf-core/vcf_qc' // Annotation -include { ANNOTATE } from '../subworkflows/local/annotate' +include { ANNOTATE } from '../subworkflows/local/annotate' // REPORTING VERSIONS OF SOFTWARE USED -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' // MULTIQC -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' +include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -379,7 +382,6 @@ workflow SAREK { new_meta.id = meta.sample // update data_type - //TODO: This is never used again as far as I see, could probably be removed new_meta.data_type = 'bam' // Use groupKey to make sure that the correct group can advance as soon as it is complete @@ -410,30 +412,62 @@ workflow SAREK { ch_versions = ch_versions.mix(GATK4_MAPPING.out.versions) } - if (params.step in ['mapping', 'prepare_recalibration']) { - ch_cram_markduplicates_no_spark = Channel.empty() - ch_cram_markduplicates_spark = Channel.empty() - ch_cram_no_markduplicates = Channel.empty() + if (params.step in ['mapping', 'markduplicates']) { + + // 1. SAMTOOLS_CRAMTOBAM ( to speed up computation) + // 2. Need fasta for cram compression (maybe just using --fasta, because this reference will be used elsewhere) + ch_cram_no_markduplicates_restart = Channel.empty() + ch_cram_markduplicates_no_spark = Channel.empty() + ch_cram_markduplicates_spark = Channel.empty() // STEP 2: markduplicates (+QC) + convert to CRAM // ch_bam_for_markduplicates will countain bam mapped with GATK4_MAPPING when step is mapping // Or bams that are specified in the samplesheet.csv when step is prepare_recalibration - ch_bam_for_markduplicates = params.step == 'mapping' ? ch_bam_mapped : ch_input_sample.map{ meta, bam, bai -> [meta, bam] } + // ch_bam_for_markduplicates = params.step == 'mapping'? ch_bam_mapped : ch_input_sample.map{ meta, input, index -> [meta, input] } + + ch_bam_for_markduplicates = Channel.empty() + ch_input_cram_indexed = Channel.empty() + + if(params.step == 'mapping'){ + + ch_bam_for_markduplicates = ch_bam_mapped + + }else{ + + ch_input_sample.map{ meta, input, index -> [meta, input, index] }.branch{ + bam: it[0].data_type == "bam" + cram: it[0].data_type == "cram" + }.set{convert} + + ch_bam_for_markduplicates = ch_bam_for_markduplicates.mix(convert.bam) + + //In case Markduplicates is run convert CRAM files to BAM, because the tool only runs on BAM files. MD_SPARK does run on CRAM but is a lot slower + if (!(params.skip_tools && params.skip_tools.contains('markduplicates'))){ + + SAMTOOLS_CRAMTOBAM(convert.cram, fasta, fasta_fai) + ch_versions = ch_versions.mix(SAMTOOLS_CRAMTOBAM.out.versions) + + ch_bam_for_markduplicates = ch_bam_for_markduplicates.mix(SAMTOOLS_CRAMTOBAM.out.alignment_index.map{ meta, bam, bai -> [meta, bam]}) + }else{ + ch_input_cram_indexed = convert.cram + } + } if (params.skip_tools && params.skip_tools.contains('markduplicates')) { // ch_bam_indexed will countain bam mapped with GATK4_MAPPING when step is mapping // which are then merged and indexed // Or bams that are specified in the samplesheet.csv when step is prepare_recalibration - ch_bam_indexed = params.step == 'mapping' ? MERGE_INDEX_BAM.out.bam_bai : ch_input_sample + ch_bam_indexed = params.step == 'mapping' ? MERGE_INDEX_BAM.out.bam_bai : convert.bam BAM_TO_CRAM(ch_bam_indexed, + ch_input_cram_indexed, fasta, fasta_fai, intervals_for_preprocessing) - ch_cram_no_markduplicates = BAM_TO_CRAM.out.cram + ch_cram_no_markduplicates_restart = Channel.empty().mix(BAM_TO_CRAM.out.cram_converted) // Gather QC reports ch_reports = ch_reports.mix(BAM_TO_CRAM.out.qc.collect{it[1]}.ifEmpty([])) @@ -468,26 +502,52 @@ workflow SAREK { ch_versions = ch_versions.mix(MARKDUPLICATES.out.versions) } - // ch_cram_for_prepare_recalibration contains either: + // ch_md_cram_for_restart contains either: // - crams from markduplicates // - crams from markduplicates_spark // - crams converted from bam mapped when skipping markduplicates - ch_cram_for_prepare_recalibration = Channel.empty().mix( + ch_md_cram_for_restart = Channel.empty().mix( ch_cram_markduplicates_no_spark, ch_cram_markduplicates_spark, - ch_cram_no_markduplicates) - - // Run Samtools stats on CRAM - SAMTOOLS_STATS_CRAM(ch_cram_for_prepare_recalibration, fasta) + ch_cram_no_markduplicates_restart).map{ meta, cram, crai -> + meta_new = meta.clone() + meta_new.data_type = "cram" //Make sure correct data types are carried through + [meta_new, cram, crai] + } // Create CSV to restart from this step - MARKDUPLICATES_CSV(ch_cram_for_prepare_recalibration) + MARKDUPLICATES_CSV(ch_md_cram_for_restart) + } - // Gather QC reports - ch_reports = ch_reports.mix(SAMTOOLS_STATS_CRAM.out.stats.collect{it[1]}.ifEmpty([])) + if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration']) { - // Gather used softwares versions - ch_versions = ch_versions.mix(SAMTOOLS_STATS_CRAM.out.versions) + // Run if starting from step "prepare_recalibration" + if(params.step == 'prepare_recalibration'){ + + //Support if starting from BAM or CRAM files + ch_input_sample.branch{ + bam: it[0].data_type == "bam" + cram: it[0].data_type == "cram" + }.set{convert} + + //BAM files first must be converted to CRAM files since from this step on we base everything on CRAM format + SAMTOOLS_BAMTOCRAM(convert.bam, fasta, fasta_fai) + ch_versions = ch_versions.mix(SAMTOOLS_BAMTOCRAM.out.versions) + + ch_cram_for_prepare_recalibration = Channel.empty().mix(SAMTOOLS_BAMTOCRAM.out.alignment_index, convert.cram) + + ch_md_cram_for_restart = SAMTOOLS_BAMTOCRAM.out.alignment_index + + } else { + + // ch_cram_for_prepare_recalibration contains either: + // - crams from markduplicates + // - crams from markduplicates_spark + // - crams converted from bam mapped when skipping markduplicates + // - input cram files, when start from step markduplicates + //ch_md_cram_for_restart.view() //contains md.cram.crai + ch_cram_for_prepare_recalibration = Channel.empty().mix(ch_md_cram_for_restart, ch_input_cram_indexed) + } // STEP 3: Create recalibration tables if (!(params.skip_tools && params.skip_tools.contains('baserecalibrator'))) { @@ -508,6 +568,7 @@ workflow SAREK { // Gather used softwares versions ch_versions = ch_versions.mix(PREPARE_RECALIBRATION_SPARK.out.versions) } else { + PREPARE_RECALIBRATION(ch_cram_for_prepare_recalibration, dict, fasta, @@ -529,18 +590,40 @@ workflow SAREK { ch_table_bqsr_no_spark, ch_table_bqsr_spark) - // Create CSV to restart from this step - PREPARE_RECALIBRATION_CSV(ch_table_bqsr) - ch_reports = ch_reports.mix(ch_table_bqsr.map{ meta, table -> table}) + + ch_cram_applybqsr = ch_cram_for_prepare_recalibration.join(ch_table_bqsr) + + // Create CSV to restart from this step + PREPARE_RECALIBRATION_CSV(ch_md_cram_for_restart.join(ch_table_bqsr)) } } // STEP 4: RECALIBRATING - if (params.step in ['mapping', 'prepare_recalibration', 'recalibrate']) { + if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate']) { + + // Run if starting from step "prepare_recalibration" + if(params.step == 'recalibrate'){ + + //Support if starting from BAM or CRAM files + ch_input_sample.branch{ + bam: it[0].data_type == "bam" + cram: it[0].data_type == "cram" + }.set{convert} + + //If BAM file, split up table and mapped file to convert BAM to CRAM + ch_bam_table = convert.bam.map{ meta, bam, bai, table -> [meta, table]} + ch_bam_bam = convert.bam.map{ meta, bam, bai, table -> [meta, bam, bai]} + + //BAM files first must be converted to CRAM files since from this step on we base everything on CRAM format + SAMTOOLS_BAMTOCRAM(ch_bam_bam, fasta, fasta_fai) + ch_versions = ch_versions.mix(SAMTOOLS_BAMTOCRAM.out.versions) + + ch_cram_applybqsr = Channel.empty().mix(SAMTOOLS_BAMTOCRAM.out.alignment_index.join(ch_bam_table), // Join together converted cram with input tables + convert.cram) + } if (!(params.skip_tools && params.skip_tools.contains('baserecalibrator'))) { - ch_cram_applybqsr = params.step == 'recalibrate' ? ch_input_sample : ch_cram_for_prepare_recalibration.join(ch_table_bqsr) ch_cram_variant_calling_no_spark = Channel.empty() ch_cram_variant_calling_spark = Channel.empty() @@ -587,11 +670,33 @@ workflow SAREK { // Gather used softwares versions ch_versions = ch_versions.mix(CRAM_QC.out.versions) - } else cram_variant_calling = ch_cram_for_prepare_recalibration - + } else if (params.step == 'recalibrate'){ + // ch_cram_variant_calling contains either: + // - input bams converted to crams, if started from step recal + skip BQSR + // - input crams if started from step recal + skip BQSR + cram_variant_calling = Channel.empty().mix(SAMTOOLS_BAMTOCRAM.out.alignment_index, + convert.cram.map{ meta, cram, crai, table -> [meta, cram, crai]}) + } else{ + // ch_cram_variant_calling contains either: + // - crams from markduplicates = ch_cram_for_prepare_recalibration if skip BQSR but not started from step recalibration + cram_variant_calling = Channel.empty().mix(ch_cram_for_prepare_recalibration) + } } - if (params.step == 'variant_calling') cram_variant_calling = ch_input_sample + if (params.step == 'variant_calling') { + + ch_input_sample.branch{ + bam: it[0].data_type == "bam" + cram: it[0].data_type == "cram" + }.set{convert} + + //BAM files first must be converted to CRAM files since from this step on we base everything on CRAM format + SAMTOOLS_BAMTOCRAM_VARIANTCALLING(convert.bam, fasta, fasta_fai) + ch_versions = ch_versions.mix(SAMTOOLS_BAMTOCRAM_VARIANTCALLING.out.versions) + + cram_variant_calling = Channel.empty().mix(SAMTOOLS_BAMTOCRAM_VARIANTCALLING.out.alignment_index, convert.cram) + + } if (params.tools) { @@ -846,10 +951,8 @@ def extract_csv(csv_file) { def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\"" meta.numLanes = numLanes.toInteger() meta.read_group = read_group.toString() - - println read_group - meta.data_type = "fastq" + meta.test = "test" meta.size = 1 // default number of splitted fastq return [meta, [fastq_1, fastq_2]] // start from BAM @@ -886,7 +989,7 @@ def extract_csv(csv_file) { def crai = file(row.crai, checkIfExists: true) meta.data_type = "cram" return [meta, cram, crai] - // prepare_recalibration when skipping MarkDuplicates + // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates` } else if (row.bam) { meta.id = meta.sample def bam = file(row.bam, checkIfExists: true)