Skip to content

Commit

Permalink
Merge pull request #536 from FriederikeHanssen/step_md
Browse files Browse the repository at this point in the history
Add step markduplicates & allow BAM input for all steps
  • Loading branch information
FriederikeHanssen authored May 12, 2022
2 parents 3bcf3bb + e044d9b commit b76d09d
Show file tree
Hide file tree
Showing 30 changed files with 2,671 additions and 2,095 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,12 @@ jobs:
- "gatk4_spark"
- "haplotypecaller"
- "manta"
- "markduplicates"
- "mutect2"
- "msisensorpro"
# - 'save_bam_mapped'
- "prepare_recalibration"
- "recalibrate"
- "variantcalling_channel"
- "skip_markduplicates"
- "strelka"
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#512](https://github.com/nf-core/sarek/pull/512), [#531](https://github.com/nf-core/sarek/pull/531), [#537](https://github.com/nf-core/sarek/pull/537) - Subway map for pipeline
- [#522](https://github.com/nf-core/sarek/pull/522) - Add QC for vcf files & MultiQC
- [#533](https://github.com/nf-core/sarek/pull/533) - Add param `--only_paired_variant_calling` to allow skipping of germline variantcalling for paired samples
- [#536](https://github.com/nf-core/sarek/pull/536) - Add `--step markduplicates` to start from duplicate marking, `--step prepare_recalibration` now ONLY starts at process `BaseRecalibrator` & adding `bam` and `cram` input support for `--step` `markduplicates`, `prepare_recalibration`, `recalibrate`, and `variant_calling`
- [#538](https://github.com/nf-core/sarek/pull/538) - Add param `--seq_platform`, default: `ILLUMINA`

### Changed
Expand Down
56 changes: 33 additions & 23 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ process {
}

withName: 'TABIX_DBSNP' {
ext.when = { !params.dbsnp_tbi && params.dbsnp && (params.step == "mapping" || params.step == "prepare_recalibration") || params.tools && (params.tools.contains('controlfreec') || params.tools.contains('haplotypecaller') || params.tools.contains('mutect2')) }
ext.when = { !params.dbsnp_tbi && params.dbsnp && (params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.contains('controlfreec') || params.tools.contains('haplotypecaller') || params.tools.contains('mutect2')) }
publishDir = [
enabled: params.save_reference,
mode: params.publish_dir_mode,
Expand All @@ -111,7 +111,7 @@ process {
}

withName: 'TABIX_KNOWN_INDELS' {
ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == 'prepare_recalibration') }
ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration') }
publishDir = [
enabled: params.save_reference,
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -294,6 +294,32 @@ process {

// MARKDUPLICATES

withName: 'SAMTOOLS_CRAMTOBAM'{
ext.args = "-b"
}

withName: 'SAMTOOLS_BAMTOCRAM.*' {
// BAM provided for step Markduplicates either run through MD or Convert -> then saved as md.cram
// BAM files provided for step prepare_recal are converted and run through BQSR -> then saved as md.cram
// BAM files provided for step recal are converted and run through BQSR II -> then saved as md.cram
ext.args = "-C"
ext.prefix = { "${meta.id}.md" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/${meta.id}/markduplicates" },
pattern: "*{cram,crai}"
]
}

withName: 'SAMTOOLS_BAMTOCRAM_VARIANTCALLING' {
ext.prefix = { "${meta.id}.recal" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/${meta.id}/recalibrated" },
pattern: "*{cram,crai}"
]
}

withName: 'GATK4_ESTIMATELIBRARYCOMPLEXITY|GATK4_MARKDUPLICATES' {
ext.prefix = { "${meta.id}.md" }
publishDir = [
Expand Down Expand Up @@ -325,15 +351,6 @@ process {
ext.when = { !(params.skip_tools && params.skip_tools.contains('markduplicates')) }
}

withName: 'SAMTOOLS_BAMTOCRAM' {
ext.prefix = { "${meta.id}.md" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/${meta.id}/markduplicates" },
pattern: "*{cram,crai}"
]
}

withName: 'INDEX_MARKDUPLICATES' {
publishDir = [
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -408,17 +425,6 @@ process {
]
}

withName: 'QUALIMAP_BAMQC' {
ext.args = '--paint-chromosome-limits --genome-gc-distr HUMAN -skip-duplicated --skip-dup-mode 0 -outformat HTML'
ext.prefix = { "${meta.id}.mapped" }
ext.when = { !(params.skip_tools && params.skip_tools.contains('bamqc')) }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/reports/qualimap/${meta.id}" },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'SAMTOOLS_STATS' {
ext.when = { !(params.skip_tools && params.skip_tools.contains('samtools')) }
publishDir = [
Expand All @@ -439,7 +445,7 @@ process {

withName: 'QUALIMAP_BAMQCCRAM' {
ext.args = '--paint-chromosome-limits --genome-gc-distr HUMAN -skip-duplicated --skip-dup-mode 0 -outformat HTML'
ext.prefix = { "${meta.id}.recal" }
ext.prefix = { "${meta.id}.mapped" }
ext.when = { !(params.skip_tools && params.skip_tools.contains('bamqc')) }
publishDir = [
mode: params.publish_dir_mode,
Expand All @@ -448,6 +454,10 @@ process {
]
}

withName: 'NFCORE_SAREK:SAREK:CRAM_QC:QUALIMAP_BAMQCCRAM' {
ext.prefix = { "${meta.id}.recal" }
}

withName: 'NFCORE_SAREK:SAREK:CRAM_QC:SAMTOOLS_STATS' {
ext.when = { !(params.skip_tools && params.skip_tools.contains('samtools')) }
publishDir = [
Expand Down
33 changes: 28 additions & 5 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,36 @@ profiles {
pair {
params.input = "${baseDir}/tests/csv/3.0/fastq_pair.csv"
}
prepare_recalibration {
params.input = "${baseDir}/tests/csv/3.0/mapped_single.csv"
markduplicates_bam {
params.input = "${baseDir}/tests/csv/3.0/mapped_single_bam.csv"
params.step = 'markduplicates'
}
markduplicates_cram {
params.input = "${baseDir}/tests/csv/3.0/mapped_single_cram.csv"
params.step = 'markduplicates'
}
prepare_recalibration_bam {
params.input = "${baseDir}/tests/csv/3.0/mapped_single_bam.csv"
params.step = 'prepare_recalibration'
}
prepare_recalibration_cram {
params.input = "${baseDir}/tests/csv/3.0/mapped_single_cram.csv"
params.step = 'prepare_recalibration'
}
recalibrate_bam {
params.input = "${baseDir}/tests/csv/3.0/prepare_recalibration_single_bam.csv"
params.step = 'recalibrate'
}
recalibrate_cram {
params.input = "${baseDir}/tests/csv/3.0/prepare_recalibration_single_cram.csv"
params.step = 'recalibrate'
}
save_bam_mapped {
params.save_bam_mapped = true
}
skip_bqsr {
params.skip_tools = "baserecalibrator"
}
skip_markduplicates {
params.skip_tools = "markduplicates"
}
Expand All @@ -70,8 +93,9 @@ profiles {
params.save_split_fastqs = true
}
targeted {
params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/multi_intervals.bed"
params.wes = true
params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.multi_intervals.bed"
params.wes = true
params.nucleotides_per_second = 20
}
tools {
params.input = "${baseDir}/tests/csv/3.0/recalibrated.csv"
Expand All @@ -85,7 +109,6 @@ profiles {
params.wes = true
params.genome = 'WBcel235'
params.vep_genome = 'WBcel235'
//params.vep_cache =
}
tools_germline {
params.input = "${baseDir}/tests/csv/3.0/recalibrated_germline.csv"
Expand Down
Binary file modified docs/images/sarek_subway.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit b76d09d

Please sign in to comment.