Merge pull request #180 from MaxUlysse/Bee

improve minimal usage of Sarek
nf-core · Apr 15, 2020 · 6fc455a · 6fc455a
2 parents e1b8796 + 752b73e
commit 6fc455a
Show file tree

Hide file tree

Showing 2 changed files with 38 additions and 12 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -25,6 +25,7 @@ Piellorieppe is one of the main massif in the Sarek National Park.
 - [#175](https://github.com/nf-core/sarek/pull/175) - Add `Sentieon` documentation
 - [#176](https://github.com/nf-core/sarek/pull/176) - Add empty `custom` genome in `genomes.config` to allow genomes that are not in `AWS iGenomes`
 - [#179](https://github.com/nf-core/sarek/pull/179) - Add `FreeBayes` germline variant calling
+- [#180](https://github.com/nf-core/sarek/pull/180) - Now saving Mapped Bams (and creating TSV) in minimal setting
 
 ### Changed - [2.6dev]
 
@@ -53,6 +54,7 @@ Piellorieppe is one of the main massif in the Sarek National Park.
 - [#143](https://github.com/nf-core/sarek/pull/143) - Revert `snpEff` cache version to `86` for `GRCh38`
 - [#152](https://github.com/nf-core/sarek/pull/152), [#158](https://github.com/nf-core/sarek/pull/158), [#164](https://github.com/nf-core/sarek/pull/164), [#174](https://github.com/nf-core/sarek/pull/174) - Update docs
 - [#164](https://github.com/nf-core/sarek/pull/164) - Update `gatk4-spark` from `4.1.4.1` to `4.1.6.0`
+- [#180](https://github.com/nf-core/sarek/pull/180) - Improve minimal setting
 
 ### Fixed - [2.6dev]
 

diff --git a/main.nf b/main.nf
@@ -1179,24 +1179,24 @@ process MergeBamMapped {
         set idPatient, idSample, idRun, file(bam) from multipleBam
 
     output:
-        set idPatient, idSample, file("${idSample}.bam") into mergedBam
+        set idPatient, idSample, file("${idSample}.bam") into bam_mapped_merged
 
     script:
     """
     samtools merge --threads ${task.cpus} ${idSample}.bam ${bam}
     """
 }
 
-mergedBam = mergedBam.dump(tag:'Merged BAM')
+bam_mapped_merged = bam_mapped_merged.dump(tag:'Merged BAM')
 
-mergedBam = mergedBam.mix(singleBam,singleBamSentieon)
+bam_mapped_merged = bam_mapped_merged.mix(singleBam,singleBamSentieon)
 
-(mergedBam, mergedBamForSentieon) = mergedBam.into(2)
+(bam_mapped_merged, mergedBamForSentieon) = bam_mapped_merged.into(2)
 
 if (!params.sentieon) mergedBamForSentieon.close()
-else mergedBam.close()
+else bam_mapped_merged.close()
 
-mergedBam = mergedBam.dump(tag:'BAMs for MD')
+bam_mapped_merged = bam_mapped_merged.dump(tag:'BAMs for MD')
 mergedBamForSentieon = mergedBamForSentieon.dump(tag:'Sentieon BAMs to Index')
 
 process IndexBamMergedForSentieon {
@@ -1216,18 +1216,21 @@ process IndexBamMergedForSentieon {
     """
 }
 
-(mergedBam, mergedBamToIndex) = mergedBam.into(2)
+(bam_mapped_merged, bam_mapped_merged_to_index) = bam_mapped_merged.into(2)
 
 process IndexBamFile {
     label 'cpus_8'
 
     tag {idPatient + "-" + idSample}
 
+    publishDir "${params.outdir}/Preprocessing/${idSample}/Mapped/${it}", mode: params.publish_dir_mode
+
     input:
-        set idPatient, idSample, file(bam) from mergedBamToIndex
+        set idPatient, idSample, file(bam) from bam_mapped_merged_to_index
 
     output:
-        set idPatient, idSample, file(bam), file("*.bai") into indexedBam
+        set idPatient, idSample, file(bam), file("*.bai") into bam_mapped_merged_indexed
+        set idPatient, idSample into tsv_bam_indexed
 
     when: !(params.known_indels)
 
@@ -1238,6 +1241,27 @@ process IndexBamFile {
     """
 }
 
+(tsv_bam_indexed, tsv_bam_indexed_sample) = tsv_bam_indexed.into(2)
+
+// Creating a TSV file to restart from this step
+tsv_bam_indexed.map { idPatient, idSample ->
+    gender = genderMap[idPatient]
+    status = statusMap[idPatient, idSample]
+    bam = "${params.outdir}/Preprocessing/${idSample}/Mapped/${idSample}.bam"
+    bai = "${params.outdir}/Preprocessing/${idSample}/Mapped/${idSample}.bam.bai"
+    "${idPatient}\t${gender}\t${status}\t${idSample}\t${bam}\t${bai}\n"
+}.collectFile(
+    name: 'mapped.tsv', sort: true, storeDir: "${params.outdir}/Preprocessing/TSV"
+)
+
+tsv_bam_indexed_sample
+    .collectFile(storeDir: "${params.outdir}/Preprocessing/TSV") { idPatient, idSample ->
+        status = statusMap[idPatient, idSample]
+        gender = genderMap[idPatient]
+        bam = "${params.outdir}/Preprocessing/${idSample}/Mapped/${idSample}.bam"
+        bai = "${params.outdir}/Preprocessing/${idSample}/Mapped/${idSample}.bam.bai"
+        ["mapped_${idSample}.tsv", "${idPatient}\t${gender}\t${status}\t${idSample}\t${bam}\t${bai}\n"]
+}
 // STEP 2: MARKING DUPLICATES
 
 process MarkDuplicates {
@@ -1252,7 +1276,7 @@ process MarkDuplicates {
         }
 
     input:
-        set idPatient, idSample, file("${idSample}.bam") from mergedBam
+        set idPatient, idSample, file("${idSample}.bam") from bam_mapped_merged
 
     output:
         set idPatient, idSample, file("${idSample}.md.bam"), file("${idSample}.md.bam.bai") into duplicateMarkedBams
@@ -1750,8 +1774,8 @@ bamQCReport = bamQCReport.dump(tag:'BamQC')
 // When using sentieon for mapping, Channel bamRecal is bamRecalSentieon
 if (params.sentieon && step == 'mapping') bamRecal = bamRecalSentieon
 
-// When no knownIndels for mapping, Channel bamRecal is indexedBam
-bamRecal = (params.known_indels && step == 'mapping') ? bamRecal : indexedBam
+// When no knownIndels for mapping, Channel bamRecal is bam_mapped_merged_indexed
+bamRecal = (params.known_indels && step == 'mapping') ? bamRecal : bam_mapped_merged_indexed
 
 // When starting with variant calling, Channel bamRecal is inputSample
 if (step == 'variantcalling') bamRecal = inputSample