Skip to content

Commit

Permalink
Merge pull request #180 from MaxUlysse/Bee
Browse files Browse the repository at this point in the history
improve minimal usage of Sarek
  • Loading branch information
maxulysse authored Apr 15, 2020
2 parents e1b8796 + 752b73e commit 6fc455a
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 12 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Piellorieppe is one of the main massif in the Sarek National Park.
- [#175](https://github.com/nf-core/sarek/pull/175) - Add `Sentieon` documentation
- [#176](https://github.com/nf-core/sarek/pull/176) - Add empty `custom` genome in `genomes.config` to allow genomes that are not in `AWS iGenomes`
- [#179](https://github.com/nf-core/sarek/pull/179) - Add `FreeBayes` germline variant calling
- [#180](https://github.com/nf-core/sarek/pull/180) - Now saving Mapped Bams (and creating TSV) in minimal setting

### Changed - [2.6dev]

Expand Down Expand Up @@ -53,6 +54,7 @@ Piellorieppe is one of the main massif in the Sarek National Park.
- [#143](https://github.com/nf-core/sarek/pull/143) - Revert `snpEff` cache version to `86` for `GRCh38`
- [#152](https://github.com/nf-core/sarek/pull/152), [#158](https://github.com/nf-core/sarek/pull/158), [#164](https://github.com/nf-core/sarek/pull/164), [#174](https://github.com/nf-core/sarek/pull/174) - Update docs
- [#164](https://github.com/nf-core/sarek/pull/164) - Update `gatk4-spark` from `4.1.4.1` to `4.1.6.0`
- [#180](https://github.com/nf-core/sarek/pull/180) - Improve minimal setting

### Fixed - [2.6dev]

Expand Down
48 changes: 36 additions & 12 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -1179,24 +1179,24 @@ process MergeBamMapped {
set idPatient, idSample, idRun, file(bam) from multipleBam

output:
set idPatient, idSample, file("${idSample}.bam") into mergedBam
set idPatient, idSample, file("${idSample}.bam") into bam_mapped_merged

script:
"""
samtools merge --threads ${task.cpus} ${idSample}.bam ${bam}
"""
}

mergedBam = mergedBam.dump(tag:'Merged BAM')
bam_mapped_merged = bam_mapped_merged.dump(tag:'Merged BAM')

mergedBam = mergedBam.mix(singleBam,singleBamSentieon)
bam_mapped_merged = bam_mapped_merged.mix(singleBam,singleBamSentieon)

(mergedBam, mergedBamForSentieon) = mergedBam.into(2)
(bam_mapped_merged, mergedBamForSentieon) = bam_mapped_merged.into(2)

if (!params.sentieon) mergedBamForSentieon.close()
else mergedBam.close()
else bam_mapped_merged.close()

mergedBam = mergedBam.dump(tag:'BAMs for MD')
bam_mapped_merged = bam_mapped_merged.dump(tag:'BAMs for MD')
mergedBamForSentieon = mergedBamForSentieon.dump(tag:'Sentieon BAMs to Index')

process IndexBamMergedForSentieon {
Expand All @@ -1216,18 +1216,21 @@ process IndexBamMergedForSentieon {
"""
}

(mergedBam, mergedBamToIndex) = mergedBam.into(2)
(bam_mapped_merged, bam_mapped_merged_to_index) = bam_mapped_merged.into(2)

process IndexBamFile {
label 'cpus_8'

tag {idPatient + "-" + idSample}

publishDir "${params.outdir}/Preprocessing/${idSample}/Mapped/${it}", mode: params.publish_dir_mode

input:
set idPatient, idSample, file(bam) from mergedBamToIndex
set idPatient, idSample, file(bam) from bam_mapped_merged_to_index

output:
set idPatient, idSample, file(bam), file("*.bai") into indexedBam
set idPatient, idSample, file(bam), file("*.bai") into bam_mapped_merged_indexed
set idPatient, idSample into tsv_bam_indexed

when: !(params.known_indels)

Expand All @@ -1238,6 +1241,27 @@ process IndexBamFile {
"""
}

(tsv_bam_indexed, tsv_bam_indexed_sample) = tsv_bam_indexed.into(2)

// Creating a TSV file to restart from this step
tsv_bam_indexed.map { idPatient, idSample ->
gender = genderMap[idPatient]
status = statusMap[idPatient, idSample]
bam = "${params.outdir}/Preprocessing/${idSample}/Mapped/${idSample}.bam"
bai = "${params.outdir}/Preprocessing/${idSample}/Mapped/${idSample}.bam.bai"
"${idPatient}\t${gender}\t${status}\t${idSample}\t${bam}\t${bai}\n"
}.collectFile(
name: 'mapped.tsv', sort: true, storeDir: "${params.outdir}/Preprocessing/TSV"
)

tsv_bam_indexed_sample
.collectFile(storeDir: "${params.outdir}/Preprocessing/TSV") { idPatient, idSample ->
status = statusMap[idPatient, idSample]
gender = genderMap[idPatient]
bam = "${params.outdir}/Preprocessing/${idSample}/Mapped/${idSample}.bam"
bai = "${params.outdir}/Preprocessing/${idSample}/Mapped/${idSample}.bam.bai"
["mapped_${idSample}.tsv", "${idPatient}\t${gender}\t${status}\t${idSample}\t${bam}\t${bai}\n"]
}
// STEP 2: MARKING DUPLICATES

process MarkDuplicates {
Expand All @@ -1252,7 +1276,7 @@ process MarkDuplicates {
}

input:
set idPatient, idSample, file("${idSample}.bam") from mergedBam
set idPatient, idSample, file("${idSample}.bam") from bam_mapped_merged

output:
set idPatient, idSample, file("${idSample}.md.bam"), file("${idSample}.md.bam.bai") into duplicateMarkedBams
Expand Down Expand Up @@ -1750,8 +1774,8 @@ bamQCReport = bamQCReport.dump(tag:'BamQC')
// When using sentieon for mapping, Channel bamRecal is bamRecalSentieon
if (params.sentieon && step == 'mapping') bamRecal = bamRecalSentieon

// When no knownIndels for mapping, Channel bamRecal is indexedBam
bamRecal = (params.known_indels && step == 'mapping') ? bamRecal : indexedBam
// When no knownIndels for mapping, Channel bamRecal is bam_mapped_merged_indexed
bamRecal = (params.known_indels && step == 'mapping') ? bamRecal : bam_mapped_merged_indexed

// When starting with variant calling, Channel bamRecal is inputSample
if (step == 'variantcalling') bamRecal = inputSample
Expand Down

0 comments on commit 6fc455a

Please sign in to comment.