From 5a439a96dad961496f84dba555147941091bb041 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Mon, 11 Dec 2023 09:51:34 -0500
Subject: [PATCH 01/58] fix: driver script added temporarily

---
 subworkflows/local/workflows.nf | 265 ++++++++++++++++----------------
 wgs-seek                        | 152 ------------------
 2 files changed, 131 insertions(+), 286 deletions(-)
 delete mode 100755 wgs-seek

diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf
index 436d677..96713b5 100644
--- a/subworkflows/local/workflows.nf
+++ b/subworkflows/local/workflows.nf
@@ -1,4 +1,4 @@
-//All Worksflows in One Place         
+//All Worksflows in One Place
 intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file')
 
 
@@ -9,38 +9,38 @@ include {fc_lane; fastq_screen;kraken;qualimap_bamqc;fastqc;
     somalier_extract;somalier_analysis_human;somalier_analysis_mouse;
     multiqc} from  '../../modules/local/qc.nf'
 
-include {fastp; bwamem2; //indelrealign; 
+include {fastp; bwamem2; //indelrealign;
     bqsr; gatherbqsr; applybqsr; samtoolsindex} from  '../../modules/local/trim_align.nf'
 
-include {deepvariant_step1;deepvariant_step2;deepvariant_step3;
+include {deepvariant_step1; deepvariant_step2; deepvariant_step3;
     deepvariant_combined;glnexus} from '../../modules/local/germline.nf'
 
-include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; 
+include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n;
     contamination_paired; learnreadorientationmodel;mergemut2stats;
-    strelka_tn; combineVariants_strelka; 
+    strelka_tn; combineVariants_strelka;
     varscan_tn; vardict_tn; lofreq_tn; muse_tn;
     octopus_tn; bcftools_index_octopus; bcftools_index_octopus as bcftools_index_octopus_tonly;
-    combineVariants as combineVariants_vardict; combineVariants as combineVariants_vardict_tonly; 
+    combineVariants as combineVariants_vardict; combineVariants as combineVariants_vardict_tonly;
     combineVariants as combineVariants_varscan; combineVariants as combineVariants_varscan_tonly;
     combineVariants_alternative as combineVariants_lofreq; combineVariants as combineVariants_muse;
     combineVariants_alternative as combineVariants_octopus; combineVariants_alternative as combineVariants_octopus_tonly;
-    annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; 
+    annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka;
     annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; annotvep_tn as annotvep_tn_octopus;
     annotvep_tn as annotvep_tn_lofreq; annotvep_tn as annotvep_tn_muse;
     annotvep_tn as annotvep_tn_combined;
     combinemafs_tn; somaticcombine} from '../../modules/local/variant_calling.nf'
 
-include {mutect2_t_tonly; mutect2filter_tonly; 
+include {mutect2_t_tonly; mutect2filter_tonly;
     varscan_tonly; vardict_tonly; octopus_tonly;
     contamination_tumoronly;
-    learnreadorientationmodel_tonly; 
+    learnreadorientationmodel_tonly;
     mergemut2stats_tonly;
-    annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; 
+    annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict;
     annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus;
     annotvep_tonly as annotvep_tonly_combined;
     combinemafs_tonly;somaticcombine_tonly} from '../../modules/local/variant_calling_tonly.nf'
 
-include {svaba_somatic; manta_somatic; 
+include {svaba_somatic; manta_somatic;
     survivor_sv; gunzip;
     annotsv_tn as annotsv_survivor_tn
     annotsv_tn as annotsv_svaba;annotsv_tn as annotsv_manta} from '../../modules/local/structural_variant.nf'
@@ -53,17 +53,17 @@ include {splitinterval} from '../../modules/local/splitbed.nf'
 
 
 workflow INPUT {
-    
+
     if(params.fastq_input){
         fastqinput=Channel.fromFilePairs(params.fastq_input)
     }else if(params.file_input) {
         fastqinput=Channel.fromPath(params.file_input)
                         .splitCsv(header: false, sep: "\t", strip:true)
-                        .map{ sample,fq1,fq2 -> 
+                        .map{ sample,fq1,fq2 ->
                         tuple(sample, tuple(file(fq1),file(fq2)))
                                   }
     }
-    
+
     if(params.sample_sheet){
         sample_sheet=Channel.fromPath(params.sample_sheet, checkIfExists: true).view()
                        .ifEmpty { "sample sheet not found" }
@@ -73,10 +73,9 @@ workflow INPUT {
                         row.Normal
                        )
                                   }
-    }else{
+    } else {
         sample_sheet=fastqinput.map{samplename,f1 -> tuple (
              samplename)}
-        
     }
 
     emit:
@@ -89,28 +88,28 @@ workflow ALIGN {
     take:
         fastqinput
         sample_sheet
-    main: 
+    main:
     fastp(fastqinput)
     splitinterval(intervalbedin)
-    
+
     bwamem2(fastp.out)
 
     //indelrealign(bwamem2.out)
-
     //indelbambyinterval=indelrealign.out.combine(splitinterval.out.flatten())
+
     bqsrbambyinterval=bwamem2.out.combine(splitinterval.out.flatten())
     bambyinterval=bwamem2.out.combine(splitinterval.out.flatten())
-    
-        
+
+
     bqsr(bqsrbambyinterval)
     bqsrs=bqsr.out.groupTuple()
-        .map { samplename,beds -> tuple( samplename, 
+        .map { samplename,beds -> tuple( samplename,
         beds.toSorted{ it -> (it.name =~ /${samplename}_(.*?).recal_data.grp/)[0][1].toInteger() } )
         }
     gatherbqsr(bqsrs)
 
     tobqsr=bwamem2.out.combine(gatherbqsr.out,by:0)
-    applybqsr(tobqsr) 
+    applybqsr(tobqsr)
 
     //sample_sheet.view()
     bamwithsample=applybqsr.out.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(applybqsr.out,by:0).map{it.swap(3,0)}
@@ -132,40 +131,40 @@ workflow GL {
     take:
         bambyinterval
     main:
-    deepvariant_step1(bambyinterval) 
+    deepvariant_step1(bambyinterval)
     deepvariant_1_sorted=deepvariant_step1.out.groupTuple()
-        .map { samplename,tfbeds,gvcfbed -> tuple( samplename, 
+        .map { samplename,tfbeds,gvcfbed -> tuple( samplename,
         tfbeds.toSorted{ it -> (it.name =~ /${samplename}.tfrecord_(.*?).bed.gz/)[0][1].toInteger() } ,
         gvcfbed.toSorted{ it -> (it.name =~ /${samplename}.gvcf.tfrecord_(.*?).bed.gz/)[0][1].toInteger() } )
         }
-    deepvariant_step2(deepvariant_1_sorted) | deepvariant_step3 
-    glin=deepvariant_step3.out.map{samplename,vcf,vcf_tbi,gvcf,gvcf_tbi -> gvcf}.collect()   
+    deepvariant_step2(deepvariant_1_sorted) | deepvariant_step3
+    glin=deepvariant_step3.out.map{samplename,vcf,vcf_tbi,gvcf,gvcf_tbi -> gvcf}.collect()
 
     glnexus(glin)
     emit:
         glnexusout=glnexus.out
         bcfout=deepvariant_step3.out
-    
+
 }
-    
+
 workflow VC {
     take:
     //Input is the BAMby interval
         bamwithsample
         splitout
         sample_sheet
-        
-    main: 
+
+    main:
     //Create Pairing for TN (in case of dups)
     sample_sheet_paired=sample_sheet|map{tu,no -> tuple ("${tu}_vs_${no}",tu, no)}
 
     bambyinterval=bamwithsample.combine(splitout.flatten())
 
-    //Paired Mutect2    
+    //Paired Mutect2
     mutect2(bambyinterval)
     pileup_paired_t(bambyinterval)
     pileup_paired_n(bambyinterval)
-    
+
     pileup_paired_tout=pileup_paired_t.out.groupTuple()
     .map{samplename,pileups-> tuple( samplename,
     pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tumor.pileup.table/)[0][1].toInteger() } ,
@@ -177,153 +176,153 @@ workflow VC {
 
 
     pileup_paired_all=pileup_paired_tout.join(pileup_paired_nout)
-    contamination_paired(pileup_paired_all) 
+    contamination_paired(pileup_paired_all)
 
     //Mutect2 TN
     mutect2.out.groupTuple(by:[0,1])
-        | multiMap { tumor,normal,vcfs,f1r2,stats -> 
+        | multiMap { tumor,normal,vcfs,f1r2,stats ->
         mut2out_lor: tuple("${tumor}_vs_${normal}",
                 f1r2.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } )
         mut2out_mstats:  tuple( "${tumor}_vs_${normal}",
                 stats.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).mut2.vcf.gz.stats/)[0][1].toInteger() })
         allmut2tn: tuple( "${tumor}_vs_${normal}",
                 vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).mut2.vcf.gz/)[0][1].toInteger() } )
-        } 
+        }
     | set{mut2out}
-    
+
     learnreadorientationmodel(mut2out.mut2out_lor)
     mergemut2stats(mut2out.mut2out_mstats)
-   
+
     mutect2_in=mut2out.allmut2tn
         | join(mergemut2stats.out)
-        | join(learnreadorientationmodel.out) 
-        | map{t,vcf,stats,ro -> tuple(t.split('_vs_')[0],t.split('_vs_')[1],vcf,stats,ro)} 
-        | join(contamination_paired.out) 
+        | join(learnreadorientationmodel.out)
+        | map{t,vcf,stats,ro -> tuple(t.split('_vs_')[0],t.split('_vs_')[1],vcf,stats,ro)}
+        | join(contamination_paired.out)
         | mutect2filter
-        | join(sample_sheet_paired) 
-        | map{sample,markedvcf,markedindex,normvcf,normindex,stats,tumor,normal -> tuple(tumor,normal,"mutect2",normvcf,normindex)}  
+        | join(sample_sheet_paired)
+        | map{sample,markedvcf,markedindex,normvcf,normindex,stats,tumor,normal -> tuple(tumor,normal,"mutect2",normvcf,normindex)}
 
     annotvep_tn_mut2(mutect2_in)
 
     //Mutect2 Tumor Only
     bambyinterval_t=bambyinterval.map{tumorname,tumor,tumorbai,normalname,normalbam,normalbai,bed ->tuple(tumorname,tumor,tumorbai,bed)}
-    mutect2_t_tonly(bambyinterval_t)  
-        
+    mutect2_t_tonly(bambyinterval_t)
+
     mutect2_t_tonly.out.groupTuple()
-        | multiMap { tumor,vcfs,f1r2,stats -> 
+        | multiMap { tumor,vcfs,f1r2,stats ->
         mut2tout_lor: tuple(tumor,
                 f1r2.toSorted{ it -> (it.name =~ /${tumor}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } )
         mut2tonly_mstats:  tuple( tumor,
                 stats.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() })
         allmut2tonly: tuple(tumor,
                 vcfs.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } )
-        } 
+        }
     | set{mut2tonlyout}
 
-    
+
     learnreadorientationmodel_tonly(mut2tonlyout.mut2tout_lor)
     mergemut2stats_tonly(mut2tonlyout.mut2tonly_mstats)
     contamination_tumoronly(pileup_paired_tout)
-    
+
     mutect2_in_tonly=mut2tonlyout.allmut2tonly
         | join(mergemut2stats_tonly.out)
         | join(learnreadorientationmodel_tonly.out)
-        | join(contamination_tumoronly.out) 
+        | join(contamination_tumoronly.out)
     | mutect2filter_tonly
     | join(sample_sheet)
-    | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,"mutect2_tonly",normvcf,normindex)} 
+    | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,"mutect2_tonly",normvcf,normindex)}
     annotvep_tonly_mut2(mutect2_in_tonly)
-    
-    //Strelka TN 
+
+    //Strelka TN
     strelka_in=strelka_tn(bambyinterval) | groupTuple(by:[0,1])
         | map { tumor,normal,vcfs,vcfindex,indels,indelindex -> tuple("${tumor}_vs_${normal}",
             vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.snvs.vcf.gz/)[0][1].toInteger() },vcfindex,
-            indels.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() } ,indelindex)} 
+            indels.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() } ,indelindex)}
         | combineVariants_strelka |  join(sample_sheet_paired)
-        | map{sample,markedvcf,markedindex,finalvcf,finalindex,tumor,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)} 
+        | map{sample,markedvcf,markedindex,finalvcf,finalindex,tumor,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)}
     annotvep_tn_strelka(strelka_in)
 
     //Vardict TN
     vardict_in=vardict_tn(bambyinterval) | groupTuple(by:[0,1])
-        | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).vardict.vcf/)[0][1].toInteger()},"vardict")} 
+        | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).vardict.vcf/)[0][1].toInteger()},"vardict")}
         | combineVariants_vardict | join(sample_sheet_paired)
-        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)}  
+        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)}
     annotvep_tn_vardict(vardict_in)
 
     //VarDict TOnly
-    vardict_in_tonly=bambyinterval 
+    vardict_in_tonly=bambyinterval
         | map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed ->
-            tuple(tumorname,tumorbam,tumorbai,bed)} 
+            tuple(tumorname,tumorbam,tumorbai,bed)}
         | vardict_tonly | groupTuple()
         | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.vardict.vcf/)[0][1].toInteger()},"vardict_tonly")}
         | combineVariants_vardict_tonly | join(sample_sheet)
-        | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"vardict_tonly",normvcf,normindex)} 
+        | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"vardict_tonly",normvcf,normindex)}
     annotvep_tonly_vardict(vardict_in_tonly)
-    
+
     //VarScan TN
-    varscan_in=bambyinterval.combine(contamination_paired.out) 
-        | varscan_tn | groupTuple(by:[0,1]) 
-        | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).varscan.vcf.gz/)[0][1].toInteger()},"varscan")} 
+    varscan_in=bambyinterval.combine(contamination_paired.out)
+        | varscan_tn | groupTuple(by:[0,1])
+        | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).varscan.vcf.gz/)[0][1].toInteger()},"varscan")}
         | combineVariants_varscan | join(sample_sheet_paired)
-        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} 
+        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)}
     annotvep_tn_varscan(varscan_in)
-    
+
     //VarScan TOnly
-    varscan_in_tonly=bambyinterval.combine(contamination_paired.out) 
+    varscan_in_tonly=bambyinterval.combine(contamination_paired.out)
     | map{tumor,bam,bai,normal,nbam,nbai,bed,tumorname2,tpile,npile,tumorc,normalc ->
-            tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly  | groupTuple() 
+            tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly  | groupTuple()
     | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf/)[0][1].toInteger()},"varscan_tonly")}
     | combineVariants_varscan_tonly
     | join(sample_sheet)
-    | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"varscan_tonly",normvcf,normindex)} 
+    | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"varscan_tonly",normvcf,normindex)}
     annotvep_tonly_varscan(varscan_in_tonly)
-    
+
     //Lofreq TN
-    lofreq_in=lofreq_tn(bambyinterval) | groupTuple(by:[0,1]) 
-        | map{tu,no,snv,dbsnv,indel,dbindel,vcf,vcfindex-> tuple("${tu}_vs_${no}",vcf.toSorted{it -> (it.name =~ /${tu}_vs_${no}_(.*?)_lofreq.vcf.gz/)[0][1].toInteger()},vcfindex,"lofreq")} 
+    lofreq_in=lofreq_tn(bambyinterval) | groupTuple(by:[0,1])
+        | map{tu,no,snv,dbsnv,indel,dbindel,vcf,vcfindex-> tuple("${tu}_vs_${no}",vcf.toSorted{it -> (it.name =~ /${tu}_vs_${no}_(.*?)_lofreq.vcf.gz/)[0][1].toInteger()},vcfindex,"lofreq")}
         | combineVariants_lofreq | join(sample_sheet_paired)
-        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal->tuple(tumor,normal,"lofreq",normvcf,normindex)} 
+        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal->tuple(tumor,normal,"lofreq",normvcf,normindex)}
     annotvep_tn_lofreq(lofreq_in)
 
     //MuSE TN
-    muse_in=muse_tn(bamwithsample) 
-        | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf,"muse")} 
+    muse_in=muse_tn(bamwithsample)
+        | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf,"muse")}
         | combineVariants_muse | join(sample_sheet_paired)
-        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"muse",normvcf,normindex)} 
+        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"muse",normvcf,normindex)}
     annotvep_tn_muse(muse_in)
 
     //Octopus TN
-    octopus_in=octopus_tn(bambyinterval) | bcftools_index_octopus 
-        | groupTuple() 
+    octopus_in=octopus_tn(bambyinterval) | bcftools_index_octopus
+        | groupTuple()
         | map{samplename,vcf,vcfindex-> tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus")}
-        | combineVariants_octopus 
-        | map{samplename,marked,markedindex,normvcf,normindex -> 
+        | combineVariants_octopus
+        | map{samplename,marked,markedindex,normvcf,normindex ->
             tuple(samplename.split('_vs_')[0],samplename.split('_vs_')[1],"octopus",normvcf,normindex)}
-    annotvep_tn_octopus(octopus_in) 
+    annotvep_tn_octopus(octopus_in)
 
     //Octopus TOnly
     octopus_in_tonly=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed->
-    tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly 
-    | groupTuple() 
+    tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly
+    | groupTuple()
         | map{samplename,vcf,vcfindex->tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).tonly.octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus_tonly")}
-        | combineVariants_octopus_tonly 
+        | combineVariants_octopus_tonly
         | join(sample_sheet) |
-        map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"octopus_tonly",normvcf,normindex)} 
+        map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"octopus_tonly",normvcf,normindex)}
     annotvep_tonly_octopus(octopus_in_tonly)
 
     //Combine All Variants Using VCF and Then Reannotate
     mutect2_in|concat(strelka_in)|concat(octopus_in)|concat(muse_in)|concat(lofreq_in)
         | concat(vardict_in) |concat(varscan_in) | groupTuple(by:[0,1])
-        | somaticcombine 
-        | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)} 
+        | somaticcombine
+        | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)}
         | annotvep_tn_combined
 
     mutect2_in_tonly|concat(octopus_in_tonly)
-        | concat(vardict_in_tonly)|concat(varscan_in_tonly) | groupTuple() 
-        | somaticcombine_tonly 
-        | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} 
+        | concat(vardict_in_tonly)|concat(varscan_in_tonly) | groupTuple()
+        | somaticcombine_tonly
+        | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)}
         | annotvep_tonly_combined
-    
+
     //Implement PCGR Annotator/CivIC Next
 
     emit:
@@ -336,18 +335,18 @@ workflow VC {
 workflow SV {
     take:
         bamwithsample
-        
-    main: 
+
+    main:
         //Svaba
         svaba_out=svaba_somatic(bamwithsample)
         .map{ tumor,bps,contigs,discord,alignents,gindel,gsv,so_indel,so_sv,unfil_gindel,unfil_gsv,unfil_so_indel,unfil_sv,log ->
-            tuple(tumor,so_sv,"svaba")} 
+            tuple(tumor,so_sv,"svaba")}
         annotsv_svaba(svaba_out).ifEmpty("Empty SV input--No SV annotated")
 
         //Manta
         manta_out=manta_somatic(bamwithsample)
-            .map{tumor,gsv,so_sv,unfil_sv,unfil_indel -> 
-            tuple(tumor,so_sv,"manta")} 
+            .map{tumor,gsv,so_sv,unfil_sv,unfil_indel ->
+            tuple(tumor,so_sv,"manta")}
         annotsv_manta(manta_out).ifEmpty("Empty SV input--No SV annotated")
 
         //Delly-WIP
@@ -361,20 +360,20 @@ workflow SV {
 workflow CNVmouse {
     take:
         bamwithsample
-        
-    main: 
+
+    main:
         //Sequenza (Preferred for Paired)
         chrs=Channel.fromList(params.genomes[params.genome].chromosomes)
-        seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> 
+        seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai->
             tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)}
         seqzin.combine(chrs) | seqz_sequenza_bychr
-        seqz_sequenza_bychr.out.groupTuple()   
+        seqz_sequenza_bychr.out.groupTuple()
             .map{pair, seqz -> tuple(pair, seqz.sort{it.name})}
-            | sequenza 
-        
+            | sequenza
+
         //FREEC Paired Mode
         bamwithsample | freec_paired
-        
+
 }
 
 workflow CNVhuman {
@@ -382,25 +381,25 @@ workflow CNVhuman {
         bamwithsample
         somaticcall_input
 
-    main: 
+    main:
         //Sequenza
         chrs=Channel.fromList(params.genomes[params.genome].chromosomes)
-        seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> 
+        seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai->
             tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)}
         seqzin.combine(chrs) | seqz_sequenza_bychr
-        seqz_sequenza_bychr.out.groupTuple()   
+        seqz_sequenza_bychr.out.groupTuple()
             .map{pair, seqz -> tuple(pair, seqz.sort{it.name})}
-            | sequenza 
+            | sequenza
 
         //Purple
         bamwithsample | amber_tn
         bamwithsample | cobalt_tn
         purplein=amber_tn.out.join(cobalt_tn.out)
-        purplein.join(somaticcall_input)| 
-        map{t1,amber,cobalt,n1,vc,vcf,vcfindex -> tuple(t1,amber,cobalt,vcf,vcfindex)}  
+        purplein.join(somaticcall_input)|
+        map{t1,amber,cobalt,n1,vc,vcf,vcfindex -> tuple(t1,amber,cobalt,vcf,vcfindex)}
             | purple
-        
-}         
+
+}
 
   /*
     //baminput=sample_sheet
@@ -409,7 +408,7 @@ workflow CNVhuman {
     //somaticinput=sample_sheet
      //      .map{samplename,bam,vcf-> tuple(samplename,file(vcf))}
 
- 
+
 
     */
 
@@ -430,14 +429,14 @@ workflow QC_NOGL {
     qualimap_bamqc(applybqsr)
     samtools_flagstats(applybqsr)
     fastqc(applybqsr)
-   
+
     //Somalier
-    somalier_extract(applybqsr) 
+    somalier_extract(applybqsr)
     som_in=somalier_extract.out.collect()
 
     //Prep for MultiQC input
     fclane_out=fc_lane.out.map{samplename,info->info}.collect()
-    fqs_out=fastq_screen.out.collect() 
+    fqs_out=fastq_screen.out.collect()
 
     kraken_out=kraken.out.map{samplename,taxa,krona -> tuple(taxa,krona)}.collect()
     qualimap_out=qualimap_bamqc.out.map{genome,rep->tuple(genome,rep)}.collect()
@@ -445,11 +444,11 @@ workflow QC_NOGL {
 
     samtools_flagstats_out=samtools_flagstats.out.collect()
 
-    if(params.genome=="hg38"){ 
+    if(params.genome=="hg38"){
         somalier_analysis_human(som_in)
         somalier_analysis_out=somalier_analysis_human.out.collect()
     }
-    else if(params.genome=="mm10"){ 
+    else if(params.genome=="mm10"){
         somalier_analysis_mouse(som_in)
         somalier_analysis_out=somalier_analysis_mouse.out.collect()
     }
@@ -466,8 +465,8 @@ workflow QC_GL {
         fastqin
         fastpout
         applybqsr
-        glnexusout 
-        bcfout 
+        glnexusout
+        bcfout
 
     main:
     //QC Steps
@@ -487,23 +486,23 @@ workflow QC_GL {
     gatk_varianteval(bcfin)
     snpeff(bcfin)
     //Somalier
-    somalier_extract(applybqsr) 
+    somalier_extract(applybqsr)
     som_in=somalier_extract.out.collect()
 
 
 
     //Prep for MultiQC input
-    if(params.genome=="hg38"){ 
+    if(params.genome=="hg38"){
         somalier_analysis_human(som_in)
         somalier_analysis_out=somalier_analysis_human.out.collect()
     }
-    else if(params.genome=="mm10"){ 
+    else if(params.genome=="mm10"){
         somalier_analysis_mouse(som_in)
         somalier_analysis_out=somalier_analysis_mouse.out.collect()
     }
 
     fclane_out=fc_lane.out.map{samplename,info->info}.collect()
-    fqs_out=fastq_screen.out.collect() 
+    fqs_out=fastq_screen.out.collect()
 
     kraken_out=kraken.out.map{samplename,taxa,krona -> tuple(taxa,krona)}.collect()
     qualimap_out=qualimap_bamqc.out.map{genome,rep->tuple(genome,rep)}.collect()
@@ -523,7 +522,7 @@ workflow QC_GL {
 
 //Variant Calling from BAM only
 workflow INPUT_BAM {
-    
+
    if(params.sample_sheet){
         sample_sheet=Channel.fromPath(params.sample_sheet, checkIfExists: true)
                        .ifEmpty { "sample sheet not found" }
@@ -533,9 +532,9 @@ workflow INPUT_BAM {
                         row.Normal
                        )
                                   }
-    } 
-    
-    //Either BAM Input or File sheet input 
+    }
+
+    //Either BAM Input or File sheet input
     if(params.bam_input){
         //Check if Index is .bai or .bam.bai
         bambai=params.bam_input +".bai"
@@ -553,25 +552,23 @@ workflow INPUT_BAM {
            .map{it-> tuple(it.simpleName,it)}
            .join(bai)
         }
-        
+
     }else if(params.file_input) {
         baminputonly=Channel.fromPath(params.file_input)
                         .splitCsv(header: false, sep: "\t", strip:true)
-                        .map{ sample,bam,bai  -> 
+                        .map{ sample,bam,bai  ->
                         tuple(sample, file(bam),file(bai))
                                   }
     }
 
-    
+
     splitinterval(intervalbedin)
-    
+
     bamwithsample=baminputonly.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(baminputonly,by:0).map{it.swap(3,0)}
-    
+
     emit:
         bamwithsample
         splitout=splitinterval.out
         sample_sheet
 
 }
-
-
diff --git a/wgs-seek b/wgs-seek
deleted file mode 100755
index 14794db..0000000
--- a/wgs-seek
+++ /dev/null
@@ -1,152 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: UTF-8 -*-
-
-"""
-ABOUT: This is the main entry for the LOGAN (whole genome sequencing pipeline).
-REQUIRES:
-  - python>=3.5
-  - nextflow  
-  - singularity
-DISCLAIMER:
-                    PUBLIC DOMAIN NOTICE
-        CCR Collaborative Bioinformatics Resource (CCBR)
-                National Cancer Institute (NCI)
-This software/database is a "United  States Government Work" under
-the terms of the United  States Copyright Act.  It was written as 
-part of the author's official duties as a United States Government
-employee and thus cannot be copyrighted. This software is freely
-available to the public for use.
-Although all  reasonable  efforts have been taken  to ensure  the
-accuracy and reliability of the software and data, CCBR do not and
-cannot warrant the performance or results that may  be obtained by 
-using this software or data. CCBR and NCI disclaim all warranties,
-express  or  implied,  including   warranties   of   performance, 
-merchantability or fitness for any particular purpose.
-Please cite the author and the "NIH Biowulf Cluster" in any work or
-product based on this material.
-
-
-    PIPELINE TYPE
-    Align --PIPE_ALIGN-TRIM ALIGN
-    Variant Calls--PIPE_VC-Variant calling step after align
-    Germline Calls DV--PIPE_GERMLINE-Germline after align
-    QC requires Alignment, Germline--PIPE_QC--After everything
-    --PIPE_BAMVC-BAM variant calling only
-    --PIPE_TONLY_TRIM-Trim and Align
-    --PIPE_TONLY_TRIM-Trim and Align
-"""
-
-import argparse, os, time, sys, subprocess, re, json
-
-def parse_args():
-    parser = argparse.ArgumentParser(description='Input files')
-    parser.add_argument('--fastq',help='FQ Inputs')
-    parser.add_argument('--filelist',help="Files input")
-    parser.add_argument('--bam',help="Files input")
-    parser.add_argument('--mode',help='Mode?')
-    parser.add_argument('--paired',help='Paired',action="store_true")
-    parser.add_argument('--splitregions',default=24,help="How splits per regions")
-    parser.add_argument('--sv',help="Add Structural VC calling",action="store_true")
-    parser.add_argument('--output',help="Output Directory")
-    parser.add_argument('--sample_sheet',help="Sample sheet")
-    parser.add_argument('--profile',help="Biowulf or Local Run")
-    parser.add_argument('--resume',action="store_true",default="True",help="Resume previous run?")
-    parser.add_argument('--submit',action="store_true",help="Submit to SLURM?")
-    parser.add_argument('--stub',action="store_true",help="Stub run")
-    args = parser.parse_args()
-    return(args)
-
-
-    
-def main():
-    args=parse_args()
-    dirname = os.path.dirname(os.path.realpath(__file__))
-    outdirname = os.path.basename(os.getcwd())
-    c1="#!/usr/bin/bash"
-    c2="module load nextflow"
-    c3="module load singularity"
-    #Paired Mode-> either align/VC/germline with FASTQ
-    if args.paired and args.sample_sheet:
-        sample_path="--sample_sheet '"+args.sample_sheet+"'"
-        if args.mode=="align":
-            mode="--PIPE_ALIGN"
-        elif args.mode=="vc" and args.sv:
-            mode="--PIPE_SV"
-        elif args.mode=="vc":
-            mode="--PIPE_VC"
-        elif args.mode=="germline":
-            mode="--PIPE_GERMLINE"
-        elif args.mode=="qc":
-            mode="--PIPE_QC"
-        if args.fastq:
-            in1="--fastq_input '"+args.fastq+"'"
-        elif args.filelist:
-            in1="--file_input "+args.filelist
-        elif args.bam:
-            in1="--bam "+args.bam
-        else:
-            print("Missing sample sheet for paired mode or you would like Tumro only mode!")
-    else:
-    #Tumor Only- fastq only
-        if args.mode=="align":
-            mode="--PIPE_TONLY_ALIGN"
-            sample_path=""
-            if args.fastq:
-                in1="--fastq_input '"+args.fastq+"'"
-            elif args.filelist:
-                in1="--file_input "+args.filelist
-        if args.mode=="vc":
-            sample_path=""
-            if args.fastq:
-                mode="--PIPE_TONLY_VC"
-                in1="--fastq_input '"+args.fastq+"'"
-            elif args.bam:
-                mode="--PIPE_TONLY_BAMVC"
-                in1="--bam_input '"+args.bam+"'"
-            elif args.filelist:
-                mode="--PIPE_TONLY_BAMVC"
-                in1="--file_input "+args.filelist
-        if args.mode=="qc":
-            sample_path=""
-            if args.fastq:
-                mode="--PIPE_TONLY_QC"
-                in1="--fastq_input '"+args.fastq+"'"
-            elif args.bam:
-                mode="--PIPE_TONLY_QC"
-                in1="--bam_input '"+args.bam+"'"
-    if (args.stub and args.profile is None): 
-        profile="-profile localstub"
-    elif args.profile=="local":
-        profile="-profile local"
-    elif (args.profile=="biowulf" or args.profile is None):
-        profile="-profile biowulf"
-    if args.resume:
-        resume="-resume"
-    else:
-        resume=""
-    c4=["nextflow run",dirname + '/wgs-seek.nf',"-c "+ dirname +"/nextflow.config",
-        in1,profile,resume,sample_path,mode,
-        "--output '" +args.output+"'"+" --split_regions " +str(args.splitregions)]
-    cmd1=' '.join(c4)
-    code=c1+"\n"+c2+"\n"+c3+"\n"+cmd1
-    time1=time.strftime("%Y_%m_%d_%H%M%S")
-    #outswarmmut='wgs_nf_'+time1+'.slurm'
-    outswarmmut=args.output+"_"+time1+'.slurm'
-    with open(outswarmmut, "a") as outfile:
-        outfile.write(code+"\n")
-    sbatch_mut="sbatch --cpus-per-task=2 --mem=16g --time 10-00:00:00 --partition norm --output submit_"+time1+".log --error error_"+time1+".log --mail-type=BEGIN,TIME_LIMIT_90,END "+outswarmmut 
-    if args.stub:
-        cmd2=cmd1+" --split_regions 4 -stub -without-podman T -without-conda -without-docker"
-        print(cmd2)
-        os.system(cmd2)
-    elif args.submit:
-        print(sbatch_mut)
-        os.system(sbatch_mut)
-    else:
-        sbatch_out='run_sbatch'+time1+'.sh'
-        with open(sbatch_out, "a") as outfile:
-            outfile.write(sbatch_mut+"\n")
-        print(sbatch_mut)
-if __name__=="__main__":
-  main()
-

From d3a86f79ce993c652d916b0d8d2e1862814471c9 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Mon, 11 Dec 2023 10:03:27 -0500
Subject: [PATCH 02/58] feat: added temporary driver script

---
 logan | 314 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 314 insertions(+)
 create mode 100755 logan

diff --git a/logan b/logan
new file mode 100755
index 0000000..9a72893
--- /dev/null
+++ b/logan
@@ -0,0 +1,314 @@
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+
+"""
+ABOUT: This is the main entry for the LOGAN (whole genome sequencing pipeline).
+REQUIRES:
+  - python>=3.5
+  - nextflow
+  - singularity
+DISCLAIMER:
+                    PUBLIC DOMAIN NOTICE
+        CCR Collaborative Bioinformatics Resource (CCBR)
+                National Cancer Institute (NCI)
+This software/database is a "United  States Government Work" under
+the terms of the United  States Copyright Act.  It was written as
+part of the author's official duties as a United States Government
+employee and thus cannot be copyrighted. This software is freely
+available to the public for use.
+Although all  reasonable  efforts have been taken  to ensure  the
+accuracy and reliability of the software and data, CCBR do not and
+cannot warrant the performance or results that may  be obtained by
+using this software or data. CCBR and NCI disclaim all warranties,
+express  or  implied,  including   warranties   of   performance,
+merchantability or fitness for any particular purpose.
+Please cite the author and the "NIH Biowulf Cluster" in any work or
+product based on this material.
+
+
+    PIPELINE TYPE
+    Align --PIPE_ALIGN-TRIM ALIGN
+    Variant Calls--PIPE_VC-Variant calling step after align
+    Germline Calls DV--PIPE_GERMLINE-Germline after align
+    QC requires Alignment, Germline--PIPE_QC--After everything
+    --PIPE_BAMVC-BAM variant calling only
+    --PIPE_TONLY_TRIM-Trim and Align
+    --PIPE_TONLY_TRIM-Trim and Align
+"""
+
+# Python standard library
+import argparse, os, time, sys, subprocess, re, json
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Input files")
+    parser.add_argument("--fastq", help="FQ Inputs")
+    parser.add_argument(
+        "--file_input",
+        help="TSV file of all fastq files used for input with 3 Columns Sample Name, Pair1, Pair2",
+    )
+    parser.add_argument("--bam", help="Glob of all the BAM files []")
+    parser.add_argument("--sample_sheet", help="Sample sheet and required for Paired")
+    parser.add_argument("--splitregions", default=24, help="How splits per regions")
+    parser.add_argument("--vc", help="Add Somatic VC calling", action="store_true")
+    parser.add_argument("--cnv", help="Add CNV calling", action="store_true")
+    parser.add_argument(
+        "--sv", help="Add Structural Variant calling", action="store_true"
+    )
+    parser.add_argument("--germline", help="Add Germline VC", action="store_true")
+    parser.add_argument(
+        "--qc",
+        help="Add QC Steps (Requires Germline Calling as well)",
+        action="store_true",
+    )
+    parser.add_argument("--output", help="Output Directory")
+    parser.add_argument("--genome", help="hg38, mm10")
+    parser.add_argument("--profile", help="Biowulf or Local Run")
+    parser.add_argument(
+        "--resume", action="store_true", default="True", help="Resume previous run?"
+    )
+    parser.add_argument("--submit", action="store_true", help="Submit to SLURM?")
+    parser.add_argument("--stub", action="store_true", help="Stub run")
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    dirname = os.path.dirname(os.path.realpath(__file__))
+    outdirname = os.path.basename(os.getcwd())
+    c1 = "#!/usr/bin/bash"
+    c2 = "module load nextflow"
+    c3 = "module load singularity"
+    # Paired Mode-> either align/VC/SV/CNV/germline(QC as well) with FASTQ
+    if args.sample_sheet:
+        sample_path = "--sample_sheet '" + args.sample_sheet + "'"
+        ##Input Section
+        if args.fastq:
+            in1 = "--fastq_input '" + args.fastq + "'"
+        elif args.file_input:
+            in1 = "--file_input " + args.file_input
+        elif args.bam:
+            in1 = "--bam_input '" + args.bam + "'"
+            baminput = True
+        else:
+            print(
+                "Missing sample sheet for paired mode or you would like Tumor only mode?"
+            )
+        alignmode = "--PIPE_ALIGN"
+        if args.vc and args.bam:
+            vcmode = "--PIPE_BAMVC"
+        elif args.vc:
+            vcmode = "--PIPE_VC"
+        if args.sv and args.bam:
+            svmode = "--PIPE_BAMSV"
+        elif args.sv:
+            svmode = "--PIPE_SV"
+        if args.cnv and args.bam:
+            cnvmode = "--PIPE_BAMCNV"
+        elif args.cnv:
+            cnvmode = "--PIPE_CNV"
+        if args.germline and args.bam:
+            germmode = "--PIPE_BAMGERMLINE"
+        elif args.germline:
+            germmode = "--PIPE_GERMLINE"
+        if args.qc and args.germline:
+            qcmode = "--PIPE_QC_GL"
+        elif args.qc:
+            qcmode = "--PIPE_QC_NOGL"
+    else:
+        ##SET DEFAULT for Tumor-Only Modes//Tumor Only Mode (No sample sheet)
+        alignmode = "--PIPE_TONLY_ALIGN"
+        qcmode = "--PIPE_TONLY_QC"
+        if (
+            args.file_input and re.search(r".bam", open(args.file_input, "r").read())
+        ) or args.bam:
+            baminput = True
+        sample_path = ""
+        if args.vc:
+            if args.fastq:
+                vcmode = "--PIPE_TONLY_VC"
+                in1 = "--fastq_input '" + args.fastq + "'"
+            elif args.bam:
+                vcmode = "--PIPE_TONLY_BAMVC"
+                in1 = "--bam_input '" + args.bam + "'"
+            elif args.file_input:
+                in1 = "--file_input " + args.file_input
+                bamin = re.search(r".bam", open(args.file_input, "r").read())
+                if bamin:
+                    vcmode = "--PIPE_TONLY_BAMVC"
+                else:
+                    vcmode = "--PIPE_TONLY_VC"
+        if args.sv:
+            if args.fastq:
+                svmode = "--PIPE_TONLY_SV"
+                in1 = "--fastq_input '" + args.fastq + "'"
+            elif args.bam:
+                svmode = "--PIPE_TONLY_BAMSV"
+                in1 = "--bam_input '" + args.bam + "'"
+            elif args.file_input:
+                in1 = "--file_input " + args.file_input
+                bamin = re.search(r".bam", open(args.file_input, "r").read())
+                if bamin:
+                    svmode = "--PIPE_TONLY_BAMSV"
+                else:
+                    svmode = "--PIPE_TONLY_SV"
+        if args.cnv:
+            if args.fastq:
+                cnvmode = "--PIPE_TONLY_CNV"
+                in1 = "--fastq_input '" + args.fastq + "'"
+            elif args.bam:
+                cnvmode = "--PIPE_TONLY_BAMCNV"
+                in1 = "--bam_input '" + args.bam + "'"
+            elif args.file_input:
+                in1 = "--file_input " + args.file_input
+                bamin = re.search(r".bam", open(args.file_input, "r").read())
+                if bamin:
+                    cnvmode = "--PIPE_TONLY_BAMCNV"
+                else:
+                    cnvmode = "--PIPE_TONLY_CNV"
+        if args.qc:
+            if args.fastq:
+                in1 = "--fastq_input '" + args.fastq + "'"
+            elif args.file_input:
+                in1 = "--file_input " + args.file_input
+    if args.stub and args.profile is None:
+        profile = "-profile localstub"
+        splitreg = "4"
+    elif args.profile == "local":
+        profile = "-profile local"
+        splitreg = str(args.splitregions)
+    elif args.profile == "biowulf" or args.profile is None:
+        profile = "-profile biowulf"
+        splitreg = str(args.splitregions)
+    if args.resume:
+        resume = "-resume"
+    else:
+        resume = ""
+    ###COMBINE ALL COMMANDS (PIPE ALIGN)
+    commandbase = [
+        "nextflow run",
+        dirname + "/main.nf",
+        "-c " + dirname + "/nextflow.config",
+        in1,
+        profile,
+        resume,
+        sample_path,
+        "--genome",
+        args.genome,
+        "--output '" + args.output + "'" + " --split_regions " + splitreg,
+    ]
+    ##FINAL COMMANDS
+    if not "baminput" in locals():
+        commandalign = commandbase + [alignmode]
+        cmd1 = " ".join(commandalign)
+    else:
+        cmd1 = ""
+    if args.vc:
+        commandvc = commandbase + [vcmode]
+        cmd2 = " ".join(commandvc)
+    else:
+        cmd2 = ""
+    if args.sv:
+        commandsv = commandbase + [svmode]
+        cmd3 = " ".join(commandsv)
+    else:
+        cmd3 = ""
+    if args.cnv:
+        commandcnv = commandbase + [cnvmode]
+        cmd4 = " ".join(commandcnv)
+    else:
+        cmd4 = ""
+    if args.germline:
+        commandgl = commandbase + [germmode]
+        cmd5 = " ".join(commandgl)
+    else:
+        cmd5 = ""
+    if args.qc:
+        commandqc = commandbase + [qcmode]
+        cmd6 = " ".join(commandqc)
+    else:
+        cmd6 = ""
+    code = (
+        c1
+        + "\n"
+        + c2
+        + "\n"
+        + c3
+        + "\n"
+        + cmd1
+        + "\n"
+        + cmd2
+        + "\n"
+        + cmd3
+        + "\n"
+        + cmd4
+        + "\n"
+        + cmd5
+        + "\n"
+        + cmd6
+    )
+    time1 = time.strftime("%Y_%m_%d_%H%M")
+    stubbase = " -stub -without-podman T -without-conda -without-docker"
+    if args.stub:
+        if not "baminput" in locals():
+            cmd1_stub = cmd1 + stubbase
+        else:
+            cmd1_stub = ""
+        if args.vc:
+            cmd2_stub = cmd2 + stubbase
+        else:
+            cmd2_stub = ""
+        if args.sv:
+            cmd3_stub = cmd3 + stubbase
+        else:
+            cmd3_stub = ""
+        if args.cnv:
+            cmd4_stub = cmd4 + stubbase
+        else:
+            cmd4_stub = ""
+        if args.germline:
+            cmd5_stub = cmd5 + stubbase
+        else:
+            cmd5_stub = ""
+        if args.qc:
+            cmd6_stub = cmd6 + stubbase
+        else:
+            cmd6_stub = ""
+        cmd_stub = (
+            cmd1_stub
+            + "\n"
+            + cmd2_stub
+            + "\n"
+            + cmd3_stub
+            + "\n"
+            + cmd4_stub
+            + "\n"
+            + cmd5_stub
+            + "\n"
+            + cmd6_stub
+        )
+        print(cmd_stub)
+        os.system(cmd_stub)
+    else:
+        outswarmmut = args.output + "_" + time1 + ".slurm"
+        with open(outswarmmut, "a") as outfile:
+            outfile.write(code + "\n")
+        sbatch_mut = (
+            "sbatch --cpus-per-task=2 --mem=8g --time 10-00:00:00 --partition norm --output submit_"
+            + time1
+            + ".log --error error_"
+            + time1
+            + ".log --mail-type=BEGIN,END "
+            + outswarmmut
+        )
+        sbatch_out = "kickoff_" + time1 + ".sh"
+        with open(sbatch_out, "a") as outfile:
+            outfile.write(sbatch_mut + "\n")
+        print(sbatch_mut)
+        if args.submit:
+            os.system(sbatch_mut)
+
+
+if __name__ == "__main__":
+    main()

From 6e91623a7735b93ff56627096c0aa99ec97ae427 Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Mon, 11 Dec 2023 14:56:14 -0500
Subject: [PATCH 03/58] refcator: moved genomes to conf folder

---
 conf/genomes.config |  71 +++++++++
 nextflow.config     | 360 ++++++--------------------------------------
 2 files changed, 117 insertions(+), 314 deletions(-)

diff --git a/conf/genomes.config b/conf/genomes.config
index e69de29..a7810a5 100644
--- a/conf/genomes.config
+++ b/conf/genomes.config
@@ -0,0 +1,71 @@
+params {
+    genomes {
+        'hg38' {
+            genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" 
+            genomefai = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta.fai" 
+            bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta"
+            genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict"
+            wgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" 
+            intervals="${projectDir}/workflow/resources/hg38_v0_wgs_calling_regions.hg38.bed"
+            //millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
+            //shapeitindel =  "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) //
+            KNOWNINDELS= '/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_indel.vcf'
+            KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz'
+            dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz"
+            dbsnp_indel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_indel.vcf"
+            gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz
+            pon = "/data/nousomedr/wgs/updatedpon.vcf.gz"    //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon} 
+            kgp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/1000G_phase1.snps.high_confidence.hg38.vcf.gz"
+            KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2"
+            snpeff_genome = "GRCh38.86"
+            snpeff_config = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/snpEff.config"
+            snpeff_bundle = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/"
+            sites_vcf= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/sites.hg38.vcf.gz"
+            somalier_ancestrydb="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/1kg-somalier"
+            vepcache = "/fdb/VEP/102/cache"
+            vepspecies = "homo_sapiens"
+            vepbuild = "GRCh38"
+            octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest"
+            octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest"
+            SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz"
+            chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM']
+        }    
+
+        'mm10' {
+            genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa" // file(params.genome) 
+            genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa.fai" // file(params.genome) 
+            bwagenome= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwaindex/genome.fa"
+            genomedict= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.dict"
+            intervals="/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/mm10_wgsregions.bed"
+            KNOWNINDELS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz"
+            KNOWNRECAL = "-known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz -known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_snps.vcf.gz"
+            dbsnp = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_allstrains_dbSNP142.vcf.gz"
+            pon = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_dbSNP_allStrains_compSet_noIND.vcf.gz"
+            kgp = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_knownSNPs_sites.vcf.gz"
+            KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2"
+            gnomad= "--germline-resource  /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_allstrains_dbSNP142.vcf.gz"
+            snpeff_genome = "GRCm38.86"
+            snpeff_config = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/snpEff/4.3t/snpEff.config"
+            snpeff_bundle = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/snpEff/4.3t/"
+            sites_vcf = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/somalier/mm10.sites.vcf.gz"
+            //EDIT SOMALIER ANCESTRY AFTER!
+            somalier_ancestrydb="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/1kg-somalier"
+            vepcache = "/fdb/VEP/102/cache"
+            vepspecies = "mus_musculus"
+            vepbuild= "GRCm38"
+            octopus_sforest = ""
+            octopus_gforest = ""
+	 	    SEQUENZAGC =  '/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/SEQUENZA/mm10.gc50Base.wig.gz'
+            FREEC {
+                FREECLENGTHS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10.fa.fai"
+                FREECCHROMS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/Chromosomes"
+                FREECPILEUP = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10_dbSNP137.ucsc.freec.bed"
+                FREECSNPS= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10_dbSNP137.ucsc.freec.txt.gz"
+            }
+            chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chrX','chrY','chrM']
+        }
+    }
+}
+
+
+
diff --git a/nextflow.config b/nextflow.config
index 987ca4b..1581cff 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -3,60 +3,57 @@ manifest {
     name = "CCBR/LOGAN"
     author = "CCR Collaborative Bioinformatics Resource"
     homePage = "https://github.com/CCBR/LOGAN"
-    description = "one-line description of LOGAN goes here"
+    description = "whoLe genOme-sequencinG Analysis pipeliNe"
     mainScript = "main.nf"
 }
+
     
-includeConfig 'conf/hg38.config'
-includeConfig 'conf/mm10.config'
+includeConfig 'conf/genomes.config'
+includeConfig 'conf/base.config'
+includeConfig 'conf/modules.config'
+includeConfig 'conf/containers.config'
+
+
+params { 
 
-params { // TODO create a separate genome config, with genome index dir that can change depending on platform. see https://github.com/CCBR/CHAMPAGNE/blob/main/conf/genomes.config
-    genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" // file(params.genome) 
-    genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict"
-    wgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" //
-    millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
-    shapeitindel =  "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) //
-    dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz"
-    dbsnp_indel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_indel.vcf"
-    gnomad = '/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz
-    //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon} 
-    pon = "/data/nousomedr/wgs/updatedpon.vcf.gz" 
-    kgp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/1000G_phase1.snps.high_confidence.hg38.vcf.gz"
-    KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2"
     fastq_screen_conf= "${projectDir}/workflow/resources/fastq_screen.conf"
     get_flowcell_lanes="${projectDir}/workflow/scripts/flowcell_lane.py"
-    intervals="${projectDir}/workflow/resources/hg38_v0_wgs_calling_regions.hg38.bed"
     splitbed="${projectDir}/workflow/resources/split_Bed_into_equal_regions.py"
-    split_regions = "24" //Number of regions to split by 
-    snpeff_genome = "GRCh38.86"
-    snpeff_config = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/snpEff.config"
-    snpeff_bundle = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/"
-    sites_vcf= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/sites.hg38.vcf.gz"
-    somalier_ancestrydb="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/1kg-somalier"
+    split_regions = "36" //Number of regions to split by 
     script_genderPrediction = "${projectDir}/workflow/scripts/RScripts/predictGender.R"
     script_combineSamples = "${projectDir}/workflow/scripts/RScripts/combineAllSampleCompareResults.R"
     script_ancestry = "${projectDir}/workflow/scripts/RScripts/sampleCompareAncestoryPlots.R"
-    bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta"
+    script_sequenza = "${projectDir}/workflow/scripts/RScripts/run_sequenza.R"
+    script_freec = "${projectDir}/workflow/scripts/make_freec_genome.pl"
+    script_freecpaired = "${projectDir}/workflow/scripts/freec_paired.pl"
+    freec_significance = "${projectDir}/workflow/scripts/assess_significance.R"
+    freec_plot = "${projectDir}/workflow/scripts/makeGraph.R"
+    lofreq_convert = "${projectDir}/workflow/scripts/add_gt_lofreq.sh"
     vep_cache = "/fdb/VEP/102/cache"
 
-    //Biowulf
-    config_profile_description = 'Biowulf nf-core config'
-    config_profile_contact = 'staff@hpc.nih.gov'
-    max_memory = 224.GB	
-    max_cpus = 32
-    output = "output"
-
     //SUB WORKFLOWS to SPLIT
     PIPE_ALIGN=null
-    PIPE_GERMLINE=null
+    PIPE_GL=null
     PIPE_VC=null
     PIPE_SV=null
+    PIPE_CNV=null
     PIPE_QC=null
+    PIPE_QC_NOGL=null
+    PIPE_QC_GL=null
     PIPE_BAMVC=null
+    PIPE_BAMCNV=null
+    PIPE_BAMSV=null
+
     PIPE_TONLY_ALIGN=null
     PIPE_TONLY_VC=null
+    PIPE_TONLY_SV=null
+    PIPE_TONLY_CNV=null
+
     PIPE_BAMVC_TONLY=null
     PIPE_TONLY_BAMVC=null
+    PIPE_TONLY_BAMSV=null
+    PIPE_TONLY_BAMCNV=null
+
     PIPE_TONLY_QC=null
 
     //Set all Inputs to null
@@ -66,9 +63,9 @@ params { // TODO create a separate genome config, with genome index dir that can
     file_input=null
 }
 
-includeConfig 'conf/base.config'
 
 profiles {
+    debug { process.beforeScript = 'echo $HOSTNAME' }
 
     docker {
         docker.enabled = true
@@ -78,6 +75,13 @@ profiles {
         // once this is established and works well, nextflow might implement this behavior as new default.
         docker.runOptions = '-u \$(id -u):\$(id -g)'
     }
+    singularity {
+        enabled = true
+        autoMounts = true
+        cacheDir = "$PWD/singularity"
+        envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID'
+        runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
+        }
     biowulf { 
         includeConfig 'conf/biowulf.config' 
     }
@@ -93,286 +97,24 @@ profiles {
     ci_stub { 
         includeConfig 'conf/ci_stub.config' 
     }
-
-    local { // TODO move all containers to conf/containers.config
-        process {
-            executor = 'local'
-            withName:fc_lane {
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0' 
-            }
-            withName:fastq_screen {
-                container= 'docker://nciccbr/ccbr_fastq_screen_0.13.0:v2.0' 
-            }
-            withName:kraken {
-                container= 'docker://nciccbr/ccbr_kraken_v2.1.1:v0.0.1'
-            }
-            withName:fastqc {
-                container= 'docker://nciccbr/ccbr_fastqc_0.11.9:v1.1'
-            }
-            withName: qualimap_bamqc {
-                container= 'docker://nciccbr/ccbr_qualimap:v0.0.1'
-            }
-            withName: 'samtools_flagstats|vcftools|bcftools_stats|gatk_varianteval|snpeff|somalier_extract|somalier_analysis' {
-                container=  'docker://dnousome/ccbr_logan_base:v0.3.0'
-            }
-            withName: 'multiqc' {
-                container=  'docker://nciccbr/ccbr_multiqc_1.9:v0.0.1'
-            }
-            withName: 'collectvariantcallmetrics' {
-                container=  'docker://nciccbr/ccbr_picard:v0.0.1'
-            }
-            withName: 'fastp|bwamem2|indelrealign|bqsr|gatherbqsr|samtoolsindex|applybqsr' {
-                container=  'docker://dnousome/ccbr_logan_base:v0.3.0'
-            }
-            withName:'mutect2|mutect2_t|mutect2_t_tonly|mutect2filter|mutect2filter_tonly|learnreadorientationmodel|learnreadorientationmodel_tonly|contamination_paired|contamination_tumoronly|pileup_paired_t|pileup_paired_n|pileup_paired_tonly' {
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-            }
-            withName: 'strelka_tn|vardict_tn|vardict_tonly|varscan_tn|varscan_tonly|combineVariants|combineVariants_strelka' {
-                 container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-            }
-            withName: 'annotvep_tn|annotvep_tonly' {
-                 container= 'docker://dnousome/ccbr_vcf2maf:v102.0.0'
-            }
-            withName: 'svaba_somatic' {
-                 container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-            }
-                                
-        }    
-        singularity {
-                enabled = true
-                autoMounts = true
-                cacheDir = "$PWD/singularity"
-                envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID'
-                // TODO refactor to no longer need bind mounts. These paths also only work on biowulf
-                runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
-
-            }    
-    }
-
-    localstub {
-         process {
-            executor = 'local'
-
-          singularity {
-                enabled = false
-          }
-         }
-    }
-
-    biowulf_DEPRECATED { // TODO switch all cpus/time/memory definitions to use labels in conf/base.config
-        process {
-            executor = 'slurm'
-            queue = 'norm'
-            queueSize = 200
-            errorStrategy = 'finish'
-            maxRetries = 0
-            pollInterval = '2 min'
-            queueStatInterval = '5 min'
-            submitRateLimit = '6/1min'
-            
-            //Default options for Slurm Nodes
-            cpus= '4'
-            time= 48.h
-            memory=64.GB
-
-            timeline.enabled = true
-            report.enabled = true
-            //scratch = '/lscratch/$SLURM_JOBID'
-
-            
-            //Each Process and Container if it exists or use module in each process
-            withName:fastq_screen{
-                container= 'docker://nciccbr/ccbr_fastq_screen_0.13.0:v2.0' 
-                memory=32.GB
-                time=48.h
-                cpus=4
-            }
-            withName:fastqc {
-                container= 'docker://nciccbr/ccbr_fastqc_0.11.9:v1.1' 
-                memory=24.GB
-                time=24.h
-                cpus=8
-            }
-            withName:fastp {
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory=24.GB
-                time=24.h
-                cpus=4
-            }
-            withName:fc_lane {
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-            }
-            withName:bwamem2 {
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory=200.GB
-                time=48.h
-                cpus=17
-            }
-            withName:indelrealign{
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory=48.GB
-                time=72.h
-                cpus=16
-            }
-            withName:bqsr{
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory= 32.GB
-                time= 48.h
-            }
-            withName:gatherbqsr{
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory= 16.GB
-                time= 2.h
-            }
-            withName:applybqsr{
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory= 48.GB
-                time= 48.h
-            }
-             withName:samtoolsindex{
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory= 16.GB
-                time= 12.h
-                cpus= 4
-            }
-            withName: 'mutect2|mutect2_t|mutect2_t_tonly' {
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory= 48.GB
-                cpus= 4
-                time= 72.h
-            }
-            withName: 'vardict_tn|vardict_tonly|varscan_tn|varscan_tonly|combineVariants|combineVariants_strelka' {
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory= 32.GB
-                cpus= 2
-                time= 72.h
-            }
-            withName: 'strelka_tn' {
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory= 48.GB
-                cpus= 16
-                time= 72.h
-            }
-            withName:'mutect2filter|mutect2filter_tonly' {
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory= 24.GB
-                time= 24.h
-                cpus= 4
-            }
-            withName:'contamination_paired|contamination_tumoronly'{
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory= 24.GB
-                time= 24.h
-            }
-            withName:'learnreadorientationmodel|learnreadorientationmodel_tonly' {
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory= 48.GB
-                time= 24.h
-                }
-            withName:'mergemut2stats|mergemut2stats_tonly' {
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory= 16.GB
-                time= 24.h
-                }
-            withName:'pileup_paired_t|pileup_paired_n|pileup_paired_tonly' {
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory= 16.GB
-                time= 12.h
-                }
-            withName:'annotvep_tn|annotvep_tonly'{
-                container= 'docker://dnousome/ccbr_vcf2maf:v102.0.0'
-                memory= 32.GB
-                time= 24.h
-                cpus=16
-            }
-            withName:kraken {
-                container= 'docker://nciccbr/ccbr_kraken_v2.1.1:v0.0.1'
-                memory= 64.GB
-                time= 24.h
-                cpus=16
-                clusterOptions="--gres=lscratch:256"
-                }
-            withName:'deepvariant_step1' {
-                memory= 64.GB
-                time= 24.h
-                cpus=2
-            }
-            withName:'deepvariant_step3' {
-                memory= 64.GB
-                time= 24.h
-                cpus=2
-                clusterOptions="--gres=lscratch:256"
-                }
-            withName:'deepvariant_step2|deepvariant_combined' {
-                memory= 70.GB
-                time= 24.h
-                cpus= 17
-                queue = 'gpu'
-                clusterOptions="--partition=gpu --gres=gpu:v100x:1,lscratch:256"
-                }
-            withName:'somalier_extract|somalier_analysis' {
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory= 16.GB
-                time= 12.h
-                }
-            withName:'gatk_varianteval' {
-                container=  'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory= 16.GB
-                time= 12.h
-                }
-            withName:'qualimap_bamqc' {
-                container= 'docker://nciccbr/ccbr_qualimap:v0.0.1'
-                cpus= 8
-                memory= 120.GB
-                time= 48.h
-                //errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : task.exitStatus in [143, 255] ? 'ignore' : 'finish' }
-                }
-            withName:'cobalt|amber|purple' {
-                memory= 64.GB
-                time= 12.h
-                cpus=16
-                }
-            withName:'svaba_somatic' {
-                container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
-                memory= 64.GB
-                time= 24.h
-                cpus=16
-                }
-
-        }
-    }
 }
-
-includeConfig 'conf/genomes.config'
-includeConfig 'conf/containers.config'
-
+  
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container
 // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
 // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.
-env {
-    PYTHONNOUSERSITE = 1
-    R_PROFILE_USER   = "/.Rprofile"
-    R_ENVIRON_USER   = "/.Renviron"
-    JULIA_DEPOT_PATH = "/usr/local/share/julia"
-}
+    env {
+        PYTHONNOUSERSITE = 1
+        R_PROFILE_USER   = "/.Rprofile"
+        R_ENVIRON_USER   = "/.Renviron"
+        JULIA_DEPOT_PATH = "/usr/local/share/julia"
+    }
 
 // Capture exit codes from upstream processes when piping
 process.shell = ['/bin/bash', '-euo', 'pipefail']
 
-        //Container options
-        singularity {
-                enabled = true
-                autoMounts = true
-                cacheDir = "$PWD/singularity"
-                envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID'
-                runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
-            }
+    
 
-    }
-}
 
-includeConfig 'conf/genomes.config'
-includeConfig 'conf/containers.config'
 
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container
 // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
@@ -384,6 +126,7 @@ env {
     JULIA_DEPOT_PATH = "/usr/local/share/julia"
 }
 
+
 // Capture exit codes from upstream processes when piping
 process.shell = ['/bin/bash', '-euo', 'pipefail']
 
@@ -399,17 +142,6 @@ report {
     file      = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html"
 }
 
-includeConfig 'conf/modules.config'
-
-
-manifest {
-    name = "CCBR/LOGAN"
-    author = "CCR Collaborative Bioinformatics Resource"
-    homePage = "https://github.com/CCBR/LOGAN"
-    description = "whoLe genOme-sequencinG Analysis pipeliNe"
-    mainScript = "main.nf"
-}
-
 // Function to ensure that resource requirements don't go beyond
 // a maximum limit
 def check_max(obj, type) {

From 6508da62a6a28217ea978ec02708ebcafe949025 Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Mon, 11 Dec 2023 14:56:28 -0500
Subject: [PATCH 04/58] fix: renamed genome variable

---
 modules/local/germline.nf              |  27 +--
 modules/local/qc.nf                    | 173 +++++++++++++------
 modules/local/splitbed.nf              |   9 +
 modules/local/trim_align.nf            | 144 ++++++++--------
 modules/local/variant_calling_tonly.nf | 229 ++++++++++++++++++-------
 5 files changed, 383 insertions(+), 199 deletions(-)

diff --git a/modules/local/germline.nf b/modules/local/germline.nf
index b004b80..d3544a5 100644
--- a/modules/local/germline.nf
+++ b/modules/local/germline.nf
@@ -1,10 +1,5 @@
-//References
-GENOME=file(params.genome)
+GENOMEREF=file(params.genomes[params.genome].genome)
 MODEL="/opt/models/wgs/model.ckpt"
-intervalbedin = file(params.intervals)
-
-
-
 
 //Output Directory
 outdir=file(params.output)
@@ -14,8 +9,6 @@ outdir=file(params.output)
 process deepvariant_step1 {
     module=['deepvariant/1.4.0']
     
-    //publishDir("${outdir}/deepvariant", mode: 'copy')
-
     input:
         tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path(bed)
     
@@ -29,7 +22,7 @@ process deepvariant_step1 {
     mkdir -p gvcf
     make_examples \
     --mode calling \
-    --ref $GENOME \
+    --ref $GENOMEREF \
     --regions ${bed} \
     --reads ${samplename}.bam \
     --channels insert_size \
@@ -52,7 +45,6 @@ process deepvariant_step2 {
     
     module=['deepvariant/1.4.0']
     
-    //publishDir("${outdir}/deepvariant", mode: 'copy')
     input:
         tuple val(samplename), path(tfrecords), path(tfgvcf)
     
@@ -80,7 +72,6 @@ process deepvariant_step2 {
 
 //Step 3 DV
 process deepvariant_step3 {
-    scratch '/lscratch/$SLURM_JOB_ID/dv'
     publishDir("${outdir}/deepvariant", mode: 'copy')
 
     module=['deepvariant/1.4.0']
@@ -97,7 +88,7 @@ process deepvariant_step3 {
     script: 
     """
    postprocess_variants \
-    --ref $GENOME \
+    --ref $GENOMEREF \
     --infile ${samplename}_call_variants_output.tfrecord.gz \
     --outfile ${samplename}.vcf.gz \
     --gvcf_outfile ${samplename}.gvcf.gz \
@@ -107,7 +98,7 @@ process deepvariant_step3 {
     stub:
     """
     touch ${samplename}.vcf.gz ${samplename}.vcf.gz.tbi
-    touch ${samplename}.gvcf.gz   ${samplename}.gvcf.gz.tbi
+    touch ${samplename}.gvcf.gz ${samplename}.gvcf.gz.tbi
 
     """
 
@@ -116,7 +107,6 @@ process deepvariant_step3 {
 //Combined DeepVariant
 process deepvariant_combined {
     module=['deepvariant/1.4.0']
-    scratch '/lscratch/$SLURM_JOB_ID/dv'
 
     publishDir("${outdir}/deepvariant", mode: 'copy')
 
@@ -132,7 +122,7 @@ process deepvariant_combined {
     """
     run_deepvariant \
         --model_type=WGS \
-        --ref=$GENOME \
+        --ref=$GENOMEREF \
         --reads=${samplename}.bam \
         --output_gvcf= ${samplename}.gvcf.gz \
         --output_vcf=${samplename}.vcf.gz \
@@ -151,11 +141,10 @@ process deepvariant_combined {
 }
 
 process glnexus {
-    //scratch '/lscratch/$SLURM_JOB_ID/dv'
-    publishDir("${outdir}/deepvariant", mode: 'copy')
 
     module=['glnexus','bcftools']
-    
+
+    publishDir("${outdir}/deepvariant", mode: 'copy')    
     input:
         path(gvcfs)
     
@@ -173,7 +162,7 @@ process glnexus {
         -m - \
         -Oz \
         --threads 8 \
-        -f $GENOME \
+        -f $GENOMEREF \
         -o germline.norm.vcf.gz \
         germline.v.bcf
 
diff --git a/modules/local/qc.nf b/modules/local/qc.nf
index 47c98f5..65515e8 100644
--- a/modules/local/qc.nf
+++ b/modules/local/qc.nf
@@ -1,22 +1,15 @@
 ///References to assign
-GENOME=file(params.genome)
-GENOMEDICT=file(params.genomedict)
-WGSREGION=file(params.wgsregion) 
-MILLSINDEL=file(params.millsindel) //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
-SHAPEITINDEL=file(params.shapeitindel) //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz
-KGP=file(params.kgp) //1000G_phase1.snps.high_confidence.hg38.vcf.gz"
-DBSNP=file(params.dbsnp) //dbsnp_138.hg38.vcf.gz"
-GNOMAD=file(params.gnomad) //somatic-hg38-af-only-gnomad.hg38.vcf.gz
-PON=file(params.pon) 
+GENOMEREF=file(params.genomes[params.genome].genome)
+DBSNP=file(params.genomes[params.genome].dbsnp) //dbsnp_138.hg38.vcf.gz"
 FASTQ_SCREEN_CONF=file(params.fastq_screen_conf)
-BACDB=file(params.KRAKENBACDB)
-SNPEFF_GENOME = params.snpeff_genome
-SNPEFF_CONFIG = file(params.snpeff_config)
-SNPEFF_BUNDLE = file(params.snpeff_bundle)
+BACDB=file(params.genomes[params.genome].KRAKENBACDB)
+SNPEFF_GENOME = params.genomes[params.genome].snpeff_genome
+SNPEFF_CONFIG = file(params.genomes[params.genome].snpeff_config)
+SNPEFF_BUNDLE = file(params.genomes[params.genome].snpeff_bundle)
 
 //SOMALIER
-SITES_VCF= file(params.sites_vcf)
-ANCESTRY_DB=file(params.somalier_ancestrydb)
+SITES_VCF= file(params.genomes[params.genome].sites_vcf)
+ANCESTRY_DB=file(params.genomes[params.genome].somalier_ancestrydb)
 SCRIPT_PATH_GENDER = file(params.script_genderPrediction)
 SCRIPT_PATH_SAMPLES = file(params.script_combineSamples)
 SCRIPT_PATH_PCA = file(params.script_ancestry)
@@ -26,7 +19,7 @@ SCRIPT_PATH_PCA = file(params.script_ancestry)
 outdir=file(params.output)
 
 process fc_lane {
-
+    label 'process_low'
     publishDir("${outdir}/QC/fc_lane/", mode:'copy')
 
     input:
@@ -57,7 +50,6 @@ process fastq_screen {
 
     publishDir(path: "${outdir}/QC/fastq_screen/", mode:'copy')
 
-    //module=['fastq_screen/0.15.2','bowtie/2-2.5.1']
     input:
     tuple val(samplename),
         path("${samplename}.R1.trimmed.fastq.gz"),
@@ -107,11 +99,6 @@ process kraken {
         Kraken logfile and interative krona report
     */
     publishDir(path: "${outdir}/QC/kraken/", mode: 'copy')
-
-    //module=['kraken/2.1.2', 'kronatools/2.8']
-    scratch '/lscratch/$SLURM_JOB_ID'
-    //scratch '/data/CCBR/rawdata/nousome/small_truth_set' //CHANGE AFTER to LSCRATCH
-
     
     input:
         tuple val(samplename), 
@@ -208,7 +195,7 @@ process qualimap_bamqc {
     //module: config['images']['qualimap']
     
     input:
-        tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bai")
+        tuple val(samplename), path(bam), path(bai)
 
     output: 
         tuple path("${samplename}_genome_results.txt"), path("${samplename}_qualimapReport.html")
@@ -216,7 +203,7 @@ process qualimap_bamqc {
     script: 
     """
     unset DISPLAY
-    qualimap bamqc -bam ${samplename}.bqsr.bam \
+    qualimap bamqc -bam ${bam} \
         --java-mem-size=112G \
         -c -ip \
         -outdir ${samplename} \
@@ -247,18 +234,19 @@ process samtools_flagstats {
     @Output:
         Text file containing alignment statistics
     */
-    publishDir("${outdir}/QC/flagstats/", mode: "copy")
-    //module=['samtools/1.16.1']
+    label 'process_mid'
 
+    publishDir("${outdir}/QC/flagstats/", mode: "copy")
+    
     input:
-        tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bai")
+        tuple val(samplename), path(bam), path(bai)
     
     output:
         path("${samplename}.samtools_flagstat.txt")
 
     script: 
     """
-    samtools flagstat ${samplename}.bqsr.bam > ${samplename}.samtools_flagstat.txt
+    samtools flagstat ${bam} > ${samplename}.samtools_flagstat.txt
     """
 
     stub:
@@ -267,6 +255,48 @@ process samtools_flagstats {
     """
 }
 
+
+process mosdepth {
+    /*
+    Quality-control step to assess depth
+    @Input:
+        Recalibrated BAM file (scatter)
+    @Output:
+        `{prefix}.mosdepth.global.dist.txt`
+        `{prefix}.mosdepth.summary.txt`
+        `{prefix}.mosdepth.region.dist.txt` (if --by is specified)
+        `{prefix}.per-base.bed.gz|per-base.d4` (unless -n/--no-per-base is specified)
+        `{prefix}.regions.bed.gz` (if --by is specified)
+        `{prefix}.quantized.bed.gz` (if --quantize is specified)
+        `{prefix}.thresholds.bed.gz` (if --thresholds is specified)
+    */
+
+    publishDir("${outdir}/QC/mosdepth/", mode: "copy")
+
+    input:
+        tuple val(samplename), path(bam), path(bai)
+    
+    output:
+        path("${samplename}.mosdepth.region.dist.txt"),
+        path("${samplename}.mosdepth.summary.txt"),
+        path("${samplename}.regions.bed.gz"),
+        path("${samplename}.regions.bed.gz.csi")
+
+
+    script: 
+    """
+    mosdepth -n --fast-mode --by 500  ${samplename} ${bam} -t $task.cpus
+    """
+
+    stub:
+    """
+    touch "${samplename}.mosdepth.region.dist.txt"
+    touch "${samplename}.mosdepth.summary.txt"
+    touch "${samplename}.regions.bed.gz"
+    touch "${samplename}.regions.bed.gz.csi"
+    """
+}
+
 process vcftools {    
     /*
     Quality-control step to calculates a measure of heterozygosity on 
@@ -279,8 +309,9 @@ process vcftools {
     @Output:
         Text file containing a measure of heterozygosity
     */
+    label 'process_mid'
+
     publishDir(path:"${outdir}/QC/vcftools", mode: 'copy')
-    //module=['vcftools/0.1.16']
     
     input: 
         tuple path(germlinevcf),path(germlinetbi)
@@ -311,9 +342,7 @@ process collectvariantcallmetrics {
         Text file containing a collection of metrics relating to snps and indels 
     */
     publishDir("${outdir}/QC/variantmetrics", mode: 'copy')
-    //module=['picard/2.20.8']
-    //container: config['images']['picard']
-
+    
     input: 
         tuple path(germlinevcf),path(germlinetbi)
     
@@ -321,9 +350,6 @@ process collectvariantcallmetrics {
         tuple path("raw_variants.variant_calling_detail_metrics"),
         path("raw_variants.variant_calling_summary_metrics")
 
-    //params: 
-     //   dbsnp=config['references']['DBSNP'],
-      //  prefix = os.path.join(output_qcdir,"raw_variants"),
        
     script:
     """
@@ -356,6 +382,7 @@ process bcftools_stats {
         Text file containing a collection of summary statistics
     */
 
+    label 'process_mid'
     publishDir("${outdir}/QC/bcftoolsstat", mode: 'copy')
 
     input:
@@ -388,8 +415,9 @@ process gatk_varianteval {
     @Output:
         Evaluation table containing a collection of summary statistics
     */
+    label 'process_mid'
+
     publishDir("${outdir}/QC/gatk_varianteval", mode: 'copy')
-    //module=['GATK/4.2.0.0']
 
     input: 
         tuple val(samplename), path("${samplename}.gvcf.gz") ,path("${samplename}.gvcf.gz.tbi")
@@ -406,7 +434,7 @@ process gatk_varianteval {
     script: 
     """
     gatk --java-options '-Xmx12g -XX:ParallelGCThreads=16' VariantEval \
-        -R $GENOME \
+        -R $GENOMEREF \
         -O ${samplename}.germline.eval.grp \
         --dbsnp $DBSNP \
         --eval ${samplename}.gvcf.gz
@@ -431,12 +459,7 @@ process snpeff {
     @Output:
         Evaluation table containing a collection of summary statistics
     */
-
-        //genome = config['references']['SNPEFF_GENOME'],
-        //config = config['references']['SNPEFF_CONFIG'],
-        //bundle = config['references']['SNPEFF_BUNDLE'],
-            //envmodules: 'snpEff/4.3t'
-            //container: config['images']['wes_base']
+    label 'process_mid'
     publishDir("${outdir}/QC/snpeff", mode: 'copy')
 
     input:  
@@ -473,6 +496,7 @@ process somalier_extract {
     @Output:
         Exracted sites in (binary) somalier format
     */
+    label 'process_low'
     publishDir("${outdir}/QC/somalier", mode: 'copy')
 
     input:
@@ -490,7 +514,7 @@ process somalier_extract {
     somalier extract \
         -d output \
         --sites $SITES_VCF \
-        -f $GENOME \
+        -f $GENOMEREF \
         ${samplename}.bam
     """
 
@@ -501,7 +525,7 @@ process somalier_extract {
     """
 }
 
-process somalier_analysis {
+process somalier_analysis_human {
     /*
     To estimate relatedness, Somalier uses extracted site information to
     compare across all samples. This step also runs the ancestry estimation
@@ -511,13 +535,9 @@ process somalier_analysis {
     @Output:
         Separate tab-separated value (TSV) files with relatedness and ancestry outputs
 
-    ancestry_db = config['references']['SOMALIER']['ANCESTRY_DB'],
-    sites_vcf = config['references']['SOMALIER']['SITES_VCF'],
-    genomeFasta = config['references']['GENOME'],
-    script_path_gender = config['scripts']['genderPrediction'],
-    script_path_samples = config['scripts']['combineSamples'],
-    script_path_pca = config['scripts']['ancestry'],
     */
+    label 'process_low'
+
     publishDir("${outdir}/QC/somalier", mode: 'copy')
 
     input:
@@ -570,6 +590,57 @@ process somalier_analysis {
     """
 }
 
+process somalier_analysis_mouse {
+    /*
+    To estimate relatedness, Somalier uses extracted site information to
+    compare across all samples. This step also runs the ancestry estimation
+    function in Somalier.
+    @Input:
+        Exracted sites in (binary) somalier format for ALL samples in the cohort
+    @Output:
+        Separate tab-separated value (TSV) files with relatedness and ancestry outputs
+
+    */
+    label 'process_low'
+
+    publishDir("${outdir}/QC/somalier", mode: 'copy')
+
+    input:
+        path(somalierin)
+    
+    output:
+        tuple path("relatedness.pairs.tsv"), 
+        path("relatedness.samples.tsv"),
+        path("predicted.genders.tsv"),
+        path("predicted.pairs.tsv")
+    
+    script:
+    """ 
+    echo "Estimating relatedness"
+    somalier relate \
+        -o "relatedness" \
+        $somalierin
+    
+    Rscript $SCRIPT_PATH_GENDER \
+        relatedness.samples.tsv \
+        predicted.genders.tsv    
+    
+    Rscript $SCRIPT_PATH_SAMPLES \
+        relatedness.pairs.tsv \
+        predicted.pairs.tsv
+    
+    """
+    
+    stub:
+
+    """
+    touch relatedness.pairs.tsv
+    touch relatedness.samples.tsv
+    touch predicted.genders.tsv
+    touch predicted.pairs.tsv
+    
+    """
+}
 
 process multiqc {
 
diff --git a/modules/local/splitbed.nf b/modules/local/splitbed.nf
index 09ffb9b..0ae2416 100644
--- a/modules/local/splitbed.nf
+++ b/modules/local/splitbed.nf
@@ -22,3 +22,12 @@ process splitinterval {
     python $SPLIT_BED -infile ${BED_IN} -num ${SPLIT_REGIONS} -out 'bedout/bed'
     """
 }
+
+/*
+Code to convert beds to interval list
+awk -F '\t' '{printf("%s\t0\t%s\n",$1,$2);}' genome.fa.fai
+bedtools subtract -a GRCh38.primary_assembly.genome.bed -b ../hg38.blacklist.bed > GRCh38.primary_assembly.genome.interval.bed
+
+gatk BedToIntervalList -I GRCh38.primary_assembly.genome.interval.bed -O \ 
+GRCh38.primary_assembly.genome.interval_list -SD GRCh38.primary_assembly.genome.dict
+*/
diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf
index 766595a..fefe243 100644
--- a/modules/local/trim_align.nf
+++ b/modules/local/trim_align.nf
@@ -1,17 +1,12 @@
-GENOME=file(params.genome)
-GENOMEDICT=file(params.genomedict)
-WGSREGION=file(params.wgsregion) 
-MILLSINDEL=file(params.millsindel) //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
-SHAPEITINDEL=file(params.shapeitindel) //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz
-KGP=file(params.kgp) //1000G_phase1.snps.high_confidence.hg38.vcf.gz"
-DBSNP=file(params.dbsnp) //dbsnp_138.hg38.vcf.gz"
-GNOMAD=file(params.gnomad) //somatic-hg38-af-only-gnomad.hg38.vcf.gz
-PON=file(params.pon) 
+GENOMEREF=file(params.genomes[params.genome].genome)
+KNOWNRECAL = params.genomes[params.genome].KNOWNRECAL
 outdir=file(params.output)
 
 
-process fastp{
+process fastp {
+    label 'process_mid'
     tag { name }
+    publishDir(path: "${outdir}/QC/fastp", mode: 'copy', pattern: '{*fastp.json,*fastp.html}') 
 
     input:
     tuple val(samplename), path(fqs)
@@ -25,7 +20,7 @@ process fastp{
 
     script:
     """
-    fastp -w 4 \
+    fastp -w $task.cpus \
         --detect_adapter_for_pe \
         --in1 ${fqs[0]} \
         --in2 ${fqs[1]} \
@@ -48,6 +43,7 @@ process fastp{
 
 process bwamem2 {
     tag { name }
+    
     input:
         tuple val(samplename), 
         path("${samplename}.R1.trimmed.fastq.gz"),
@@ -59,18 +55,17 @@ process bwamem2 {
         tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai")
 
     script:
-    //BWAmem2/samblaster/samtools sort for marking duplicates;
     """
 
      bwa-mem2 mem -M \
         -R '@RG\\tID:${samplename}\\tSM:${samplename}\\tPL:illumina\\tLB:${samplename}\\tPU:${samplename}\\tCN:hgsc\\tDS:wgs' \
-        -t 16 \
-        ${GENOME} \
+        -t $task.cpus \
+        ${GENOMEREF} \
         ${samplename}.R1.trimmed.fastq.gz ${samplename}.R2.trimmed.fastq.gz | \
     samblaster -M | \
-    samtools sort -@12 -m 4G - -o ${samplename}.bam
+    samtools sort -@ $task.cpus -m 4G - -o ${samplename}.bam
 
-    samtools index -@ 8 ${samplename}.bam ${samplename}.bai
+    samtools index -@ $task.cpus ${samplename}.bam ${samplename}.bai
 
     """
 
@@ -80,66 +75,26 @@ process bwamem2 {
     """
 }
 
-process indelrealign {
-    /*
-    Briefly, RealignerTargetCreator runs faster with increasing -nt threads, 
-    while IndelRealigner shows diminishing returns for increasing scatter
-    */
-    tag { name }
-    
-    input:
-    tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai")
-
-    output:
-    tuple val(samplename), path("${samplename}.ir.bam")
-
-    script: 
-    
-    """
-    /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T RealignerTargetCreator \
-        -I ${samplename}.bam \
-        -R ${GENOME} \
-        -o ${samplename}.intervals \
-        -nt 16 \
-        -known ${MILLSINDEL} -known ${SHAPEITINDEL} 
-    
-    /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T IndelRealigner \
-        -R ${GENOME} \
-        -I ${samplename}.bam \
-        -known ${MILLSINDEL} -known ${SHAPEITINDEL} \
-        --use_jdk_inflater \
-        --use_jdk_deflater \
-        -targetIntervals ${samplename}.intervals \
-        -o  ${samplename}.ir.bam
-    """
-    
-
-    stub:
-    """
-    touch ${samplename}.ir.bam 
-    """
-
-}
-
 
 
 process bqsr {
     /*
     Base quality recalibration for all samples 
     */    
+
+    label 'process_low'
     input:
         tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path(bed)
 
     output:
-        tuple val(samplename),path("${samplename}_${bed.simpleName}.recal_data.grp"),emit: bqsrby
-        //path("${bam.simpleName}_${bed.simpleName}.recal_data.grp"), emit: bqsrby
+        tuple val(samplename),path("${samplename}_${bed.simpleName}.recal_data.grp"), emit: bqsrby
 
     script:
     """
-    gatk --java-options '-Xmx32g' BaseRecalibrator \
+    gatk --java-options '-Xmx16g' BaseRecalibrator \
     --input ${samplename}.bam \
-    --reference ${GENOME} \
-    --known-sites ${MILLSINDEL} --known-sites ${SHAPEITINDEL} \
+    --reference ${GENOMEREF} \
+    ${KNOWNRECAL} \
     --output ${samplename}_${bed.simpleName}.recal_data.grp \
     --intervals ${bed}
     """
@@ -152,7 +107,7 @@ process bqsr {
 }
 
 process gatherbqsr {
-
+    label 'process_low'
     input: 
         tuple val(samplename), path(recalgroups)
     output:
@@ -169,6 +124,7 @@ process gatherbqsr {
     """
 
     stub:
+
     """
     touch ${samplename}.recal_data.grp
     """
@@ -179,6 +135,7 @@ process applybqsr {
     /*
     Base quality recalibration for all samples to 
     */   
+    label 'process_low'
     publishDir(path: "${outdir}/bams/BQSR", mode: 'copy') 
 
     input:
@@ -188,19 +145,19 @@ process applybqsr {
         tuple val(samplename), path("${samplename}.bqsr.bam"),  path("${samplename}.bqsr.bai")
 
     script:
-    """
 
+    """
     gatk --java-options '-Xmx32g' ApplyBQSR \
-        --reference ${GENOME} \
+        --reference ${GENOMEREF} \
         --input ${samplename}.bam \
         --bqsr-recal-file ${samplename}.recal_data.grp \
         --output ${samplename}.bqsr.bam \
         --use-jdk-inflater \
         --use-jdk-deflater
-
     """
 
     stub:
+    
     """
     touch ${samplename}.bqsr.bam ${samplename}.bqsr.bai
     """
@@ -210,6 +167,7 @@ process applybqsr {
 
 
 process samtoolsindex {
+    label 'process_mid'
     publishDir(path: "${outdir}/bams/BQSR", mode: 'copy') 
     
     input:
@@ -220,7 +178,7 @@ process samtoolsindex {
 
     script:
     """
-    samtools index -@ 4 ${bam} ${bam}.bai
+    samtools index -@ $task.cpus ${bam} ${bam}.bai
     """
 
     stub:
@@ -230,8 +188,9 @@ process samtoolsindex {
 
 }
 
-//Save to CRAM for output and publish
-process bamtocram_tonly{
+//Save to CRAM for output
+process bamtocram_tonly {
+    label 'process_mid'
     
     input: 
         tuple val(tumorname), path(tumor), path(tumorbai)
@@ -241,6 +200,49 @@ process bamtocram_tonly{
 
     script:
     """
-        samtools view -@ 4 -C -T $GENOME -o ${sample}.cram {$tumor}.bam
+        samtools view -@ $task.cpus -C -T $GENOMEREF -o ${sample}.cram {$tumor}.bam
     """
-}
\ No newline at end of file
+}
+
+
+/*
+process indelrealign {
+    //Briefly, RealignerTargetCreator runs faster with increasing -nt threads, 
+    //while IndelRealigner shows diminishing returns for increasing scatter
+    
+    tag { name }
+    
+    input:
+    tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai")
+
+    output:
+    tuple val(samplename), path("${samplename}.ir.bam")
+
+    script: 
+    
+    """
+    /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T RealignerTargetCreator \
+        -I ${samplename}.bam \
+        -R ${GENOMEREF} \
+        -o ${samplename}.intervals \
+        -nt 16 \
+        -known ${MILLSINDEL} -known ${SHAPEITINDEL} 
+    
+    /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T IndelRealigner \
+        -R ${GENOMEREF} \
+        -I ${samplename}.bam \
+        -known ${MILLSINDEL} -known ${SHAPEITINDEL} \
+        --use_jdk_inflater \
+        --use_jdk_deflater \
+        -targetIntervals ${samplename}.intervals \
+        -o  ${samplename}.ir.bam
+    """
+    
+
+    stub:
+    """
+    touch ${samplename}.ir.bam 
+    """
+
+}
+*/
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index 3b73da7..3d67e26 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -1,13 +1,15 @@
-GENOME=file(params.genome)
-GENOMEDICT=file(params.genomedict)
-WGSREGION=file(params.wgsregion) 
-MILLSINDEL=file(params.millsindel) //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
-SHAPEITINDEL=file(params.shapeitindel) //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz
-KGP=file(params.kgp) //1000G_phase1.snps.high_confidence.hg38.vcf.gz"
-DBSNP=file(params.dbsnp) //dbsnp_138.hg38.vcf.gz"
-GNOMAD=file(params.gnomad) //somatic-hg38-af-only-gnomad.hg38.vcf.gz
-PON=file(params.pon) 
-VEP_CACHEDIR=file(params.vep_cache)
+GENOMEREF=file(params.genomes[params.genome].genome)
+GENOMEFAI=file(params.genomes[params.genome].genomefai)
+GENOMEDICT=file(params.genomes[params.genome].genomedict)
+KGPGERMLINE=params.genomes[params.genome].kgp //1000G_phase1.snps.high_confidence.hg38.vcf.gz"
+DBSNP=file(params.genomes[params.genome].dbsnp) //dbsnp_138.hg38.vcf.gz"
+GNOMADGERMLINE=params.genomes[params.genome].gnomad //somatic-hg38-af-only-gnomad.hg38.vcf.gz
+PON=file(params.genomes[params.genome].pon) 
+VEPCACHEDIR=file(params.genomes[params.genome].vepcache)
+VEPSPECIES=params.genomes[params.genome].vepspecies
+VEPBUILD=params.genomes[params.genome].vepbuild
+SOMATIC_FOREST=params.genomes[params.genome].octopus_sforest
+GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest
 
 //Output
 outdir=file(params.output)
@@ -15,6 +17,8 @@ outdir=file(params.output)
 
 
 process pileup_paired_tonly {
+    label 'process_highmem'
+
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), path(bed)
     
@@ -27,7 +31,7 @@ process pileup_paired_tonly {
     """
     gatk --java-options -Xmx48g GetPileupSummaries \
         -I ${tumor} \
-        -V ${KGP} \
+        -V $KGPGERMLINE \
         -L ${bed} \
         -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table 
 
@@ -43,6 +47,7 @@ process pileup_paired_tonly {
 
 
 process contamination_tumoronly {
+    label 'process_highmem'
     publishDir(path: "${outdir}/vcfs/mutect2/", mode: 'copy')
 
     input:
@@ -61,7 +66,7 @@ process contamination_tumoronly {
 
     """
     gatk GatherPileupSummaries \
-    --sequence-dictionary ${GENOMEDICT} \
+    --sequence-dictionary $GENOMEDICT \
     -I ${alltumor} -O ${tumorname}_allpileups.table
     
     gatk CalculateContamination \
@@ -81,6 +86,7 @@ process contamination_tumoronly {
 
 
 process learnreadorientationmodel_tonly {
+    label 'process_highmem'
     publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')
 
     input:
@@ -109,6 +115,7 @@ process learnreadorientationmodel_tonly {
 
 
 process mergemut2stats_tonly {
+    label 'process_low'
     publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')
 
     input:
@@ -136,7 +143,7 @@ process mergemut2stats_tonly {
 
 
 process mutect2_t_tonly {
-    
+    label 'process_somaticcaller'
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), path(bed)
     
@@ -150,12 +157,12 @@ process mutect2_t_tonly {
 
     """
     gatk Mutect2 \
-    --reference ${GENOME} \
+    --reference $GENOMEREF \
     --intervals ${bed} \
     --input ${tumor} \
     --tumor-sample ${tumor.simpleName} \
-    --germline-resource ${GNOMAD} \
-    --panel-of-normals ${PON} \
+    $GNOMADGERMLINE \
+    --panel-of-normals $PON \
     --output ${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz \
     --f1r2-tar-gz ${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz \
     --independent-mates    
@@ -174,13 +181,16 @@ process mutect2_t_tonly {
 
 
 process mutect2filter_tonly {
+    label 'process_mid'
     publishDir(path: "${outdir}/vcfs/mutect2_tonly", mode: 'copy')
 
     input:
         tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups),path(tumorcontamination)
     output:
-        tuple val(sample), path("${sample}.tonly.mut2.marked.vcf.gz"), 
-        path("${sample}.tonly.mut2.norm.vcf.gz"), path("${sample}.tonly.marked.vcf.gz.filteringStats.tsv")
+        tuple val(sample), 
+        path("${sample}.tonly.mut2.marked.vcf.gz"),path("${sample}.tonly.mut2.marked.vcf.gz.tbi"), 
+        path("${sample}.tonly.mut2.norm.vcf.gz"),path("${sample}.tonly.mut2.norm.vcf.gz.tbi"), 
+        path("${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv")
 
     script:
     //Include the stats and  concat ${mutvcfs} -Oz -o ${sample}.concat.vcf.gz
@@ -191,7 +201,7 @@ process mutect2filter_tonly {
     gatk GatherVcfs -I ${mut2in} -O ${sample}.tonly.concat.vcf.gz 
     gatk IndexFeatureFile -I ${sample}.tonly.concat.vcf.gz 
     gatk FilterMutectCalls \
-        -R ${GENOME} \
+        -R $GENOMEREF \
         -V ${sample}.tonly.concat.vcf.gz \
         --ob-priors ${obs} \
         --contamination-table ${tumorcontamination} \
@@ -199,32 +209,31 @@ process mutect2filter_tonly {
         -O ${sample}.tonly.mut2.marked.vcf.gz
 
     gatk SelectVariants \
-        -R ${GENOME} \
-        --variant ${sample}.tonly.marked.vcf.gz \
+        -R $GENOMEREF \
+        --variant ${sample}.tonly.mut2.marked.vcf.gz \
         --exclude-filtered \
         --output ${sample}.tonly.mut2.final.vcf.gz
 
-    bcftools sort ${sample}.tonly.mut2.final.vcf.gz -@ 16 -Oz |\
-    bcftools norm --threads 16 --check-ref s -f $GENOME -O v |\
+    bcftools sort ${sample}.tonly.mut2.final.vcf.gz |\
+    bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\t"; print}}' |\
-        sed '/^\$/d' > ${sample}.tonly.mut2.norm.vcf.gz
+        sed '/^\$/d' |\
+    bcftools view - -Oz -o  ${sample}.tonly.mut2.norm.vcf.gz
+    bcftools index -t ${sample}.tonly.mut2.norm.vcf.gz
 
     """
 
     stub:
     """
-    touch ${sample}.tonly.mut2.marked.vcf.gz
-    touch ${sample}.tonly.mut2.norm.vcf.gz
-    touch ${sample}.tonly.marked.vcf.gz.filteringStats.tsv
+    touch ${sample}.tonly.mut2.marked.vcf.gz ${sample}.tonly.mut2.marked.vcf.gz.tbi
+    touch ${sample}.tonly.mut2.norm.vcf.gz ${sample}.tonly.mut2.norm.vcf.gz.tbi
+    touch ${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv
     """
 }
 
 
-
-
-
-
 process varscan_tonly {
+    label 'process_somaticcaller'
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), 
         path(bed),
@@ -232,50 +241,53 @@ process varscan_tonly {
     
     output:
         tuple val(tumorname),
-        path("${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf")
+        path("${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf.gz")
     
     shell:
 
-    """
+    '''
     varscan_opts="--strand-filter 0 --min-var-freq 0.01 --output-vcf 1 --variants 1"
-    pileup_cmd="samtools mpileup -d 100000 -q 15 -Q 15 -f !GENOME !{tumor}"
+    pileup_cmd="samtools mpileup -d 100000 -q 15 -Q 15 -f !{GENOMEREF} !{tumor}"
     varscan_cmd="varscan mpileup2cns <($pileup_cmd) $varscan_opts"
 
+    eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf"
 
-    eval "$varscan_cmd > {output.vcf}.gz"
-    eval "bcftools view -U {output.vcf}.gz > {output.vcf}"
-    """
+    printf "TUMOR\t!{tumorname}\n" > sampname 
+    
+    bcftools reheader -s sampname !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf \
+        | bcftools view -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf.gz
+
+    '''
 
     stub:
-    
     """
-    touch ${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf
-    
+    touch ${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf.gz
     """
 
 }
 
+
 process vardict_tonly {
-    
+    label 'process_highcpu'
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), path(bed)
     
     output:
         tuple val(tumorname),
-        path("${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf")
+        path("${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz")
     
     script:
 
     """
-    VarDict -G $GENOME \
-        -f 0.05 \
+    bedtools makewindows -b ${bed} -w 50150 -s 50000 > temp_${bed}
+
+    VarDict -G $GENOMEREF \
+        -f 0.01 \
         -x 500 \
         --nosv \
-        -b ${tumor} \
-        -t -Q 20 -c 1 -S 2 -E 3 \
-        -R ${bed} \
-        | teststrandbias.R \
-        | var2vcf_valid.pl \
+        -b ${tumor} --fisher \
+        -t -Q 20 -c 1 -S 2 -E 3 --th $task.cpus \
+        temp_${bed} | var2vcf_valid.pl \
             -N ${tumor} \
             -Q 20 \
             -d 10 \
@@ -284,25 +296,97 @@ process vardict_tonly {
             -E \
             -f 0.05 >  ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf
 
+    printf "${tumor.Name}\t${tumorname}\n" > sampname 
+    
+    bcftools reheader -s sampname ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf \
+        | bcftools view -Oz -o ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz
+
     """
 
     stub:
     
     """
-    touch ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf
+    touch ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz
+
+    """
+
+}
+
+
+process octopus_tonly {
+    //label 'process_highcpu'
+
+    input:
+        tuple val(tumorname), path(tumor), path(tumorbai), path(bed)
+    
+    output:
+        tuple val(tumorname),
+        path("${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz")
+    
+    script:
 
     """
+    octopus -R $GENOMEREF -C cancer -I ${tumor} \
+    --annotations AC AD DP \
+    --target-working-memory 64Gb \
+    -t ${bed} \
+    $SOMATIC_FOREST \
+    -o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz --threads $task.cpus
 
 
+    """
+
+    stub:
+    
+    """
+    touch ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz
+    """
 }
 
 
+
+process somaticcombine_tonly {
+    label 'process_mid'
+    publishDir(path: "${outdir}/vcfs/combined_tonly", mode: 'copy')
+
+    input: 
+        tuple val(tumorsample), 
+        val(callers),
+        path(vcfs), path(vcfindex)
+
+    output:
+        tuple val(tumorsample),
+        path("${tumorsample}_combined_tonly.vcf.gz"),
+        path("${tumorsample}_combined_tonly.vcf.gz.tbi")
+
+    script:
+        vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
+        vcfin2="-V:" + vcfin1.join(" -V:")
+
+    """
+    java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \
+        -R $GENOMEREF \
+        --genotypeMergeOption PRIORITIZE \
+        --priority_list mutect2_tonly,octopus_tonly,vardict_tonly,varscan_tonly \
+        --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \
+        -O ${tumorsample}_combined_tonly.vcf.gz \
+        $vcfin2
+    """
+
+    stub:
+    """
+    touch ${tumorsample}_combined_tonly.vcf.gz ${tumorsample}_combined_tonly.vcf.gz.tbi
+    """
+
+}
+
 process annotvep_tonly {
     publishDir("${outdir}/mafs", mode: "copy")
 
     input:
         tuple val(tumorsample), 
-        val(vc), path(tumorvcf) 
+        val(vc), path(tumorvcf), 
+        path(vcfindex)
 
 
     output:
@@ -310,19 +394,47 @@ process annotvep_tonly {
 
     shell:
 
-    """
+    '''
+    VCF_SAMPLE_IDS=($(bcftools query -l !{tumorvcf}))
+    TID_IDX=0
+    NID_IDX=""
+    VCF_NID=""
+    NORM_VCF_ID_ARG=""
+    NSAMPLES=${#VCF_SAMPLE_IDS[@]}
+    if [ $NSAMPLES -gt 1 ]; then
+        # Assign tumor, normal IDs 
+        # Look through column names and 
+        # see if they match provided IDs
+        for (( i = 0; i < $NSAMPLES; i++ )); do
+            echo "${VCF_SAMPLE_IDS[$i]}"
+            if [ "${VCF_SAMPLE_IDS[$i]}" == !{tumorsample} ]; then
+                TID_IDX=$i
+            fi
+            
+        done
+
+        if [ ! -z $NID_IDX ]; then
+            VCF_NID=${VCF_SAMPLE_IDS[$NID_IDX]}
+            NORM_VCF_ID_ARG="--vcf-normal-id $VCF_NID"
+        fi
+    fi
+    VCF_TID=${VCF_SAMPLE_IDS[$TID_IDX]}
+   
+    zcat !{tumorvcf} > !{tumorvcf.baseName}
     
-    zcat !{tumorvcf}.vcf.gz > !{tumorvcf}.vcf
+    mkdir -p tumor_only/!{vc}
 
     vcf2maf.pl \
-    --vep-forks 16 --input-vcf !{tumorvcf}.vcf \
-    --output-maf !{vc}/!{tumorsample}.tonly.maf \
+    --vep-forks !{task.cpus} --input-vcf !{tumorvcf.baseName} \
+    --output-maf tumor_only/!{vc}/!{tumorsample}.tonly.maf \
     --tumor-id !{tumorsample} \
     --vep-path /opt/vep/src/ensembl-vep \
-    --vep-data $VEP_CACHEDIR \
-    --ncbi-build GRCh38 --species homo_sapiens --ref-fasta !{GENOME}
+    --vep-data !{VEPCACHEDIR} \
+    --ncbi-build !{VEPBUILD} --species !{VEPSPECIES} --ref-fasta !{GENOMEREF} \
+    --vep-overwrite
 
-    """
+
+    '''
 
     stub:
     """
@@ -332,6 +444,7 @@ process annotvep_tonly {
 }
 
 process combinemafs_tonly {
+    label 'process_low'
     publishDir(path: "${outdir}/mafs/tumor_only", mode: 'copy')
 
     input: 

From cfdfe4c40b82c948cbfa455f3b5a431c144a74a6 Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Mon, 11 Dec 2023 15:16:51 -0500
Subject: [PATCH 05/58] fix: publish mode

---
 nextflow.config | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/nextflow.config b/nextflow.config
index 1581cff..1915606 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -61,6 +61,9 @@ params {
     fastq_input=null
     bam_input=null
     file_input=null
+
+    publish_dir_mode = 'symlink'
+
 }
 
 

From 46091f75090dfeebc4fbcb346b3ae7b8b5adfde3 Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Mon, 11 Dec 2023 15:20:50 -0500
Subject: [PATCH 06/58] fix: change output dir

---
 main.nf                                | 2 +-
 modules/local/copynumber.nf            | 1 -
 modules/local/qc.nf                    | 2 --
 modules/local/structural_variant.nf    | 1 -
 modules/local/trim_align.nf            | 1 -
 modules/local/variant_calling.nf       | 2 --
 modules/local/variant_calling_tonly.nf | 4 ----
 nextflow.config                        | 1 +
 8 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/main.nf b/main.nf
index 4babf3a..e66c6ea 100644
--- a/main.nf
+++ b/main.nf
@@ -31,7 +31,7 @@ log.info """\
          W G S S E E K   P I P E L I N E    
          =============================
          genome: ${params.genome}
-         outdir: ${params.output}
+         outdir: ${params.outdir}
          Samplesheet: ${params.sample_sheet}
          Samples: ${params.fastq_input} ${params.file_input} ${params.bam_input}
          """
diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf
index b15a8c2..392270e 100644
--- a/modules/local/copynumber.nf
+++ b/modules/local/copynumber.nf
@@ -24,7 +24,6 @@ HOTSPOTS='/data/SCLC-BRAINMETS/cn/variants/KnownHotspots.somatic.38.vcf.gz'
 //DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) 
 //ascatR=
 
-outdir=file(params.output)
 
 //mm10 Paired-Sequenza, FREEC-tumor only 
 process seqz_sequenza_bychr {
diff --git a/modules/local/qc.nf b/modules/local/qc.nf
index 65515e8..dd2bdfa 100644
--- a/modules/local/qc.nf
+++ b/modules/local/qc.nf
@@ -16,8 +16,6 @@ SCRIPT_PATH_PCA = file(params.script_ancestry)
     
 
 //OUTPUT DIRECTORY 
-outdir=file(params.output)
-
 process fc_lane {
     label 'process_low'
     publishDir("${outdir}/QC/fc_lane/", mode:'copy')
diff --git a/modules/local/structural_variant.nf b/modules/local/structural_variant.nf
index d807753..6182624 100644
--- a/modules/local/structural_variant.nf
+++ b/modules/local/structural_variant.nf
@@ -3,7 +3,6 @@ GENOME=params.genome
 BWAGENOME=file(params.genomes[params.genome].bwagenome)
 DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) 
 
-outdir=file(params.output)
 
 
 process svaba_somatic {
diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf
index fefe243..19dfa81 100644
--- a/modules/local/trim_align.nf
+++ b/modules/local/trim_align.nf
@@ -1,6 +1,5 @@
 GENOMEREF=file(params.genomes[params.genome].genome)
 KNOWNRECAL = params.genomes[params.genome].KNOWNRECAL
-outdir=file(params.output)
 
 
 process fastp {
diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index d7b3bf9..9d7892e 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -12,8 +12,6 @@ SOMATIC_FOREST=params.genomes[params.genome].octopus_sforest
 GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest
 LOFREQ_CONVERT=params.lofreq_convert
 
-//Output
-outdir=file(params.output)
 
 
 process mutect2 {
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index 3d67e26..a252597 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -11,10 +11,6 @@ VEPBUILD=params.genomes[params.genome].vepbuild
 SOMATIC_FOREST=params.genomes[params.genome].octopus_sforest
 GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest
 
-//Output
-outdir=file(params.output)
-
-
 
 process pileup_paired_tonly {
     label 'process_highmem'
diff --git a/nextflow.config b/nextflow.config
index 1915606..48b8d2c 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -63,6 +63,7 @@ params {
     file_input=null
 
     publish_dir_mode = 'symlink'
+    outdir = 'results'
 
 }
 

From f74061e7b33fa512a1640b1ac30c584d288ce9bb Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Mon, 11 Dec 2023 15:36:53 -0500
Subject: [PATCH 07/58] fix: corrected the pipeline order

---
 main.nf | 156 +++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 115 insertions(+), 41 deletions(-)

diff --git a/main.nf b/main.nf
index e66c6ea..e1e7422 100644
--- a/main.nf
+++ b/main.nf
@@ -6,21 +6,36 @@ date = new Date().format( 'yyyyMMdd' )
 
 //SUB WORKFLOWS to SPLIT
 PIPE_ALIGN=params.PIPE_ALIGN
-PIPE_GERMLINE=params.PIPE_GERMLINE
+
 PIPE_VC=params.PIPE_VC
 PIPE_SV=params.PIPE_SV
-PIPE_QC=params.PIPE_QC
-PIPE_BAMVC=params.PIPE_BAMVC
+PIPE_CNV=params.PIPE_CNV
+
+PIPE_QC_GL=params.PIPE_QC_GL
+PIPE_QC_NOGL=params.PIPE_QC_NOGL
+
+PIPE_GL=params.PIPE_GL
+
 PIPE_TONLY_ALIGN=params.PIPE_TONLY_ALIGN
 PIPE_TONLY_VC=params.PIPE_TONLY_VC
-PIPE_TONLY_BAMVC=params.PIPE_TONLY_BAMVC
+PIPE_TONLY_SV=params.PIPE_TONLY_SV
+PIPE_TONLY_CNV=params.PIPE_TONLY_CNV
 PIPE_TONLY_QC=params.PIPE_TONLY_QC
 
 
+PIPE_BAMVC=params.PIPE_BAMVC
+PIPE_BAMSV=params.PIPE_BAMCNV
+PIPE_BAMCNV=params.PIPE_BAMCNV
+
+PIPE_TONLY_BAMVC=params.PIPE_TONLY_BAMVC
+PIPE_TONLY_BAMSV=params.PIPE_TONLY_BAMSV
+PIPE_TONLY_BAMCNV=params.PIPE_TONLY_BAMCNV
+
+
 
-include {INPUT_PIPE;TRIM_ALIGN_PIPE;
-    GERMLINE_PIPE;VARIANTCALL_PIPE;INPUT_BAMVC_PIPE;SV_PIPE;
-    QC_PIPE} from "./subworkflows/local/workflows.nf"
+include {INPUT; ALIGN; GL;
+    VC; INPUT_BAM; SV; CNVmouse; CNVhuman;
+    QC_GL; QC_NOGL} from "./subworkflows/local/workflows.nf"
 
 include {INPUT_TONLY; INPUT_TONLY_BAM;
     ALIGN_TONLY;
@@ -46,69 +61,128 @@ workflow.onComplete {
     }
 }
 
+//Final Workflow
 //Final Workflow
 workflow {
 
     if (PIPE_ALIGN){
-        INPUT_PIPE()
-        TRIM_ALIGN_PIPE(INPUT_PIPE.out.fastqinput,INPUT_PIPE.out.sample_sheet)
+        INPUT()
+        ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
     } 
-
-    //GermlineVC 
-    if (PIPE_GERMLINE){
-        INPUT_PIPE()
-        TRIM_ALIGN_PIPE(INPUT_PIPE.out.fastqinput,INPUT_PIPE.out.sample_sheet)
-        GERMLINE_PIPE(TRIM_ALIGN_PIPE.out.bambyinterval)
+    //Germline
+    if (PIPE_GL){
+        INPUT()
+        ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
+        GL(ALIGN.out.bambyinterval)
     }
 
     //Tumor-Normal Pipelines
     if (PIPE_VC){
-        INPUT_PIPE()
-        TRIM_ALIGN_PIPE(INPUT_PIPE.out.fastqinput,INPUT_PIPE.out.sample_sheet)
-        VARIANTCALL_PIPE(TRIM_ALIGN_PIPE.out.bamwithsample,TRIM_ALIGN_PIPE.out.splitout,TRIM_ALIGN_PIPE.out.sample_sheet)
+        INPUT()
+        ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
+        VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet)
     }
-    if (PIPE_QC){
-        INPUT_PIPE()
-        TRIM_ALIGN_PIPE(INPUT_PIPE.out.fastqinput,INPUT_PIPE.out.sample_sheet)
-        GERMLINE_PIPE(TRIM_ALIGN_PIPE.out.bambyinterval)
-        QC_PIPE(TRIM_ALIGN_PIPE.out.fastqin,TRIM_ALIGN_PIPE.out.fastpout,TRIM_ALIGN_PIPE.out.bwamem2out,GERMLINE_PIPE.out.glnexusout,GERMLINE_PIPE.out.bcfout)
-
+    if (PIPE_QC_GL){
+        INPUT()
+        ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
+        GL(ALIGN.out.bambyinterval)
+        QC_GL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout,GL.out.glnexusout,GL.out.bcfout)
+    }  
+    if (PIPE_QC_NOGL){
+        INPUT()
+        ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
+        QC_NOGL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout)
     }  
     if (PIPE_SV){
-        INPUT_PIPE()
-        TRIM_ALIGN_PIPE(INPUT_PIPE.out.fastqinput,INPUT_PIPE.out.sample_sheet)
-        SV_PIPE(TRIM_ALIGN_PIPE.out.bamwithsample)
+        INPUT()
+        ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
+        SV(ALIGN.out.bamwithsample)
+    }  
+    if (PIPE_CNV){
+        INPUT()
+        ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
+        if (params.genome == "mm10"){
+            CNVmouse(ALIGN.out.bamwithsample)
+        } else if (params.genome== "hg38"){
+            VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet)
+            CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input)
 
+        }
     }  
     if (PIPE_BAMVC){
-        INPUT_BAMVC_PIPE()
-        VARIANTCALL_PIPE(INPUT_BAMVC_PIPE.out.bamwithsample,INPUT_BAMVC_PIPE.out.splitout,INPUT_BAMVC_PIPE.out.sample_sheet)
+        INPUT_BAM()
+        VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet)
+    }  
+    if (PIPE_BAMSV){
+        INPUT_BAM()
+        SV(INPUT_BAM.out.bamwithsample)
+    }  
+    if (PIPE_BAMCNV){
+        INPUT_BAM()
+        if (params.genome == "mm10"){
+            CNVmouse(INPUT_BAM.out.bamwithsample)
+        } else if (params.genome== "hg38"){
+            VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet)
+            CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input)
+
+        }
     }  
 
 
     ///Tumor Only Pipelines
     if (PIPE_TONLY_ALIGN){
-        INPUT_TONLY_PIPE()
-        TRIM_ALIGN_TONLY_PIPE(INPUT_TONLY_PIPE.out.fastqinput,INPUT_TONLY_PIPE.out.sample_sheet)
+        INPUT_TONLY()
+        ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet)
     }
     if (PIPE_TONLY_VC){
-        INPUT_TONLY_PIPE()
-        TRIM_ALIGN_TONLY_PIPE(INPUT_TONLY_PIPE.out.fastqinput,INPUT_TONLY_PIPE.out.sample_sheet)
-        VARIANT_TONLY_PIPE(TRIM_ALIGN_TONLY_PIPE.out.bamwithsample,TRIM_ALIGN_TONLY_PIPE.out.splitout,TRIM_ALIGN_TONLY_PIPE.out.sample_sheet)
+        INPUT_TONLY()
+        ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet)
+        VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet)
     }    
-    if (PIPE_TONLY_QC){
-        INPUT_TONLY_PIPE()
-        TRIM_ALIGN_TONLY_PIPE(INPUT_TONLY_PIPE.out.fastqinput,INPUT_TONLY_PIPE.out.sample_sheet)
-        QC_TONLY_PIPE(TRIM_ALIGN_TONLY_PIPE.out.fastqin,TRIM_ALIGN_TONLY_PIPE.out.fastpout,TRIM_ALIGN_TONLY_PIPE.out.bqsrout)
+    if (PIPE_TONLY_SV){
+        INPUT_TONLY()
+        ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet)
+        SV_TONLY(ALIGN_TONLY.out.bamwithsample)
+    }   
+    if (PIPE_TONLY_CNV){
+        INPUT_TONLY()
+        ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet)
+        if (params.genome == "mm10"){
+            CNVmouse_tonly(ALIGN_TONLY.out.bamwithsample)
+        } else if (params.genome== "hg38"){
+            VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet)
+            CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input)
 
+        }
     }  
 
-    //Variant Calling from BAM only/Tumor Only
+    if (PIPE_TONLY_QC){
+        INPUT_TONLY()
+        ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet)
+        QC_TONLY(ALIGN_TONLY.out.fastqin,ALIGN_TONLY.out.fastpout,ALIGN_TONLY.out.bqsrout)
+
+    }  
+    //Variant Calling from BAM-Tumor Only Mode
     if (PIPE_TONLY_BAMVC){
-        INPUT_TONLY_BAMVC_PIPE()
-        VARIANT_TONLY_PIPE(INPUT_TONLY_BAMVC_PIPE.out.bamwithsample,INPUT_TONLY_BAMVC_PIPE.out.splitout,INPUT_TONLY_BAMVC_PIPE.out.sample_sheet)
+        INPUT_TONLY_BAM()
+        VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet)
+    }
+    if (PIPE_TONLY_BAMSV){
+        INPUT_TONLY_BAM()
+        SV_TONLY(INPUT_TONLY_BAM.out.bamwithsample)
+    }  
+    if (PIPE_TONLY_BAMCNV){
+        INPUT_TONLY_BAM()
+        if (params.genome == "mm10"){
+            CNVmouse_tonly(INPUT_TONLY_BAM.out.bamwithsample)
+        }else if (params.genome== "hg38"){
+            VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet)
+            CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input)
+
+        }
     }  
 }
+
     
 
 

From 4c045cdc4c22b7d22ec3518bcf551b8ded3dee0e Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Mon, 11 Dec 2023 15:40:03 -0500
Subject: [PATCH 08/58] fix: output rename

---
 modules/local/germline.nf | 2 --
 1 file changed, 2 deletions(-)

diff --git a/modules/local/germline.nf b/modules/local/germline.nf
index d3544a5..6896f68 100644
--- a/modules/local/germline.nf
+++ b/modules/local/germline.nf
@@ -1,8 +1,6 @@
 GENOMEREF=file(params.genomes[params.genome].genome)
 MODEL="/opt/models/wgs/model.ckpt"
 
-//Output Directory
-outdir=file(params.output)
 
 //Processes
 //Deep Variant

From 1431f64187f121c5b0f14c44af5718ffd84baf1c Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Mon, 11 Dec 2023 15:40:45 -0500
Subject: [PATCH 09/58] fix: copy number

---
 subworkflows/local/workflows.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf
index 96713b5..d9dab2d 100644
--- a/subworkflows/local/workflows.nf
+++ b/subworkflows/local/workflows.nf
@@ -46,7 +46,7 @@ include {svaba_somatic; manta_somatic;
     annotsv_tn as annotsv_svaba;annotsv_tn as annotsv_manta} from '../../modules/local/structural_variant.nf'
 
 include {amber_tn; cobalt_tn; purple;
-    sequenza; seqz_sequenza_bychr; freec; freec_paired } from './copynumber.nf'
+    sequenza; seqz_sequenza_bychr; freec; freec_paired } from '../../modules/local/copynumber.nf'
 
 include {splitinterval} from '../../modules/local/splitbed.nf'
 

From 38465d6f2becdfd54ddd384f23f904142197b5b1 Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Mon, 11 Dec 2023 15:41:56 -0500
Subject: [PATCH 10/58] fix: sv location

---
 subworkflows/local/workflows_tonly.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf
index 07c274c..ca5eb5f 100644
--- a/subworkflows/local/workflows_tonly.nf
+++ b/subworkflows/local/workflows_tonly.nf
@@ -41,9 +41,9 @@ include {mutect2_t_tonly; mutect2filter_tonly; pileup_paired_tonly;
 
 include {manta_tonly; svaba_tonly; survivor_sv; gunzip;
 annotsv_tonly as annotsv_manta_tonly; annotsv_tonly as annotsv_svaba_tonly;
-annotsv_tonly as annotsv_survivor_tonly} from './structural_variant.nf'
+annotsv_tonly as annotsv_survivor_tonly} from '../../modules/local/structural_variant.nf'
 
-include {freec; amber_tonly; cobalt_tonly; purple  } from './copynumber.nf'
+include {freec; amber_tonly; cobalt_tonly; purple  } from '../../modules/local/copynumber.nf'
 
 include {splitinterval} from '../../modules/local/splitbed.nf'
 

From e1675b79c6142da256fdab829d008bd8d6508947 Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Mon, 11 Dec 2023 15:43:35 -0500
Subject: [PATCH 11/58] fix: hg38 location

---
 conf/genomes.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/genomes.config b/conf/genomes.config
index a7810a5..2ee6cdc 100644
--- a/conf/genomes.config
+++ b/conf/genomes.config
@@ -6,7 +6,7 @@ params {
             bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta"
             genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict"
             wgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" 
-            intervals="${projectDir}/workflow/resources/hg38_v0_wgs_calling_regions.hg38.bed"
+            intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed"
             //millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
             //shapeitindel =  "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) //
             KNOWNINDELS= '/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_indel.vcf'

From b640a4534edab083199a5c12302551afc000db26 Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Mon, 11 Dec 2023 15:47:15 -0500
Subject: [PATCH 12/58] refactor: moved and all linked scripts

---
 nextflow.config | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 48b8d2c..c3b84d4 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -16,19 +16,20 @@ includeConfig 'conf/containers.config'
 
 params { 
 
-    fastq_screen_conf= "${projectDir}/workflow/resources/fastq_screen.conf"
-    get_flowcell_lanes="${projectDir}/workflow/scripts/flowcell_lane.py"
-    splitbed="${projectDir}/workflow/resources/split_Bed_into_equal_regions.py"
+    fastq_screen_conf = "${projectDir}/conf/fastq_screen.conf"
+    get_flowcell_lanes = "${projectDir}/bin/scripts/flowcell_lane.py"
+    splitbed= "${projectDir}/bin/split_Bed_into_equal_regions.py"
+    script_genderPrediction = "${projectDir}/bin/RScripts/predictGender.R"
+    script_combineSamples = "${projectDir}/bin/combineAllSampleCompareResults.R"
+    script_ancestry = "${projectDir}/bin/sampleCompareAncestoryPlots.R"
+    script_sequenza = "${projectDir}/bin/run_sequenza.R"
+    script_freec = "${projectDir}/bin/make_freec_genome.pl"
+    script_freecpaired = "${projectDir}/bin/freec_paired.pl"
+    freec_significance = "${projectDir}/bin/assess_significance.R"
+    freec_plot = "${projectDir}/bin/makeGraph.R"
+    lofreq_convert = "${projectDir}/bin/add_gt_lofreq.sh"
     split_regions = "36" //Number of regions to split by 
-    script_genderPrediction = "${projectDir}/workflow/scripts/RScripts/predictGender.R"
-    script_combineSamples = "${projectDir}/workflow/scripts/RScripts/combineAllSampleCompareResults.R"
-    script_ancestry = "${projectDir}/workflow/scripts/RScripts/sampleCompareAncestoryPlots.R"
-    script_sequenza = "${projectDir}/workflow/scripts/RScripts/run_sequenza.R"
-    script_freec = "${projectDir}/workflow/scripts/make_freec_genome.pl"
-    script_freecpaired = "${projectDir}/workflow/scripts/freec_paired.pl"
-    freec_significance = "${projectDir}/workflow/scripts/assess_significance.R"
-    freec_plot = "${projectDir}/workflow/scripts/makeGraph.R"
-    lofreq_convert = "${projectDir}/workflow/scripts/add_gt_lofreq.sh"
+
     vep_cache = "/fdb/VEP/102/cache"
 
     //SUB WORKFLOWS to SPLIT

From 4dbed734e2299365c42f78089986ed2b7b7a3a26 Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Mon, 11 Dec 2023 15:49:18 -0500
Subject: [PATCH 13/58] fix: disable singularity for stub

---
 conf/ci_stub.config | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/conf/ci_stub.config b/conf/ci_stub.config
index 6273277..0fc4817 100644
--- a/conf/ci_stub.config
+++ b/conf/ci_stub.config
@@ -15,4 +15,8 @@ params {
 process {
     cpus = 1
     memory = '1.GB'
+
+        singularity {
+                enabled = false
+        }
 }

From c0354b1c55d32e77d48e3b6364f8681ee06d9667 Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Mon, 11 Dec 2023 15:53:46 -0500
Subject: [PATCH 14/58] fix: lower mem for stub

---
 conf/ci_stub.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/ci_stub.config b/conf/ci_stub.config
index 0fc4817..808f53f 100644
--- a/conf/ci_stub.config
+++ b/conf/ci_stub.config
@@ -6,7 +6,7 @@ params {
     outdir = 'results/test'
 
     max_cpus = 2        // for GitHub Actions https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
-    max_memory = '6.GB'
+    max_memory = '4.GB'
     max_time   = '6.h'
 
     publish_dir_mode = "symlink"

From defaf906fe851f8bfc5e01d18245942e41cce127 Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Mon, 11 Dec 2023 16:57:41 -0500
Subject: [PATCH 15/58] refactor: moved publishdir to module.config

---
 conf/modules.config                    | 280 +++++++++++++++++++++++++
 modules/local/copynumber.nf            |   7 -
 modules/local/germline.nf              |  10 +-
 modules/local/qc.nf                    |  32 +--
 modules/local/structural_variant.nf    |   7 -
 modules/local/trim_align.nf            |   4 +-
 modules/local/variant_calling.nf       |  13 --
 modules/local/variant_calling_tonly.nf |   5 -
 8 files changed, 285 insertions(+), 73 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index ff3e484..3c8a778 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -8,4 +8,284 @@ process {
 
     errorStrategy = 'finish'
 
+    withName: sequenza {
+        publishDir = [
+            path: { "${params.outdir}/cnv/sequenza" },
+            mode: 'copy'
+        ]
+    }
+
+
+    withName: freec_paired {
+        publishDir = [
+            path: { "${params.outdir}/cnv/freec" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: freec {
+        publishDir = [
+            path: { "${params.outdir}/cnv/freec" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'amber_tonly|amber_tn' {
+        publishDir = [
+            path: { "${params.outdir}/cnv/amber" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'cobalt_tonly|cobalt_tn' {
+        publishDir = [
+            path: { "${params.outdir}/cnv/cobalt" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'purple' {
+        publishDir = [
+            path: { "${params.outdir}/cnv/purple" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'deepvariant_step3|deepvariant_combined|glnexus' {
+        publishDir = [
+            path: { "${params.outdir}/germline/deepvariant" },
+            mode: 'copy'
+        ]
+        module=['deepvariant/1.4.0']
+    }
+
+    withName: 'deepvariant_step1|deepvariant_step2' {
+        module = ['deepvariant/1.4.0']
+    }
+
+    withName: 'fc_lane' {
+        publishDir = [
+            path: { "${params.outdir}/QC/fc_lane" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'fastq_screen' {
+        publishDir = [
+            path: { "${params.outdir}/QC/fastq_screen" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'kraken' {
+        publishDir = [
+            path: { "${params.outdir}/QC/kraken" },
+            mode: 'copy'
+        ]
+    }
+
+
+    withName: 'fastqc' {
+        publishDir = [
+            path: { "${params.outdir}/QC/fastqc" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'qualimap|qualimap_bamqc' {
+        publishDir = [
+            path: { "${params.outdir}/QC/qualimap" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'samtools_flagstats' {
+        publishDir = [
+            path: { "${params.outdir}/QC/samtools_flagstats" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'mosdepth' {
+        publishDir = [
+            path: { "${params.outdir}/QC/mosdepth" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'vcftools' {
+        publishDir = [
+            path: { "${params.outdir}/QC/vcftools" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'collectvariantcallmetrics' {
+        publishDir = [
+            path: { "${params.outdir}/QC/collectvariantcallmetrics" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'bcftools_stats' {
+        publishDir = [
+            path: { "${params.outdir}/QC/bcftools_stat" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'gatk_varianteval' {
+        publishDir = [
+            path: { "${params.outdir}/QC/gatk_varianteval" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'snpeff' {
+        publishDir = [
+            path: { "${params.outdir}/QC/snpeff" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'somalier_extract|somalier_analysis_human|somalier_analysis_mouse' {
+        publishDir = [
+            path: { "${params.outdir}/QC/somalier" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'multiqc' {
+        publishDir = [
+            path: { "${params.outdir}/QC/multiqc" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'svaba_somatic' {
+        publishDir = [
+            path: { "${params.outdir}/SV/svaba" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'svaba_tonly' {
+        publishDir = [
+            path: { "${params.outdir}/SV/svaba_tonly" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'manta_somatic' {
+        publishDir = [
+            path: { "${params.outdir}/SV/manta" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'manta_tonly' {
+        publishDir = [
+            path: { "${params.outdir}/SV/manta_tonly" },
+            mode: 'copy'
+        ]
+    }
+      
+    withName: 'annotsv_tn' {
+        publishDir = [
+            path: { "${params.outdir}/SV/annotated" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'annotsv_tonly' {
+        publishDir = [
+            path: { "${params.outdir}/SV/annotated_tonly" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'survivor_sv' {
+        publishDir = [
+            path: { "${params.outdir}/SV/survivor" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'fastp' {
+        publishDir = [
+            path: { "${params.outdir}/QC/fastp" },
+            mode: 'copy',
+            pattern: '{*fastp.json,*fastp.html}'
+        ]
+    }
+
+    withName: 'applybqsr|samtoolsindex' {
+        publishDir = [
+            path: { "${params.outdir}/bams/BQSR" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'contamination_tumoronly|learnreadorientationmodel_tonly|learnreadorientationmodel|mergemut2stats|mergemut2stats_tonly|contamination_paired|mutect2filter' {
+        publishDir = [
+            path: { "${params.outdir}/vcfs/mutect2" },
+            mode: 'copy'
+        ]
+    }
+
+ withName: 'mutect2filter_tonly' {
+        publishDir = [
+            path: { "${params.outdir}/vcfs/mutect2_tonly" },
+            mode: 'copy'
+        ]
+    }
+
+ withName: 'annotvep_tonly|annotvep_tn' {
+        publishDir = [
+            path: { "${params.outdir}/mafs" },
+            mode: 'copy'
+        ]
+    }
+
+ withName: 'combinemafs_tonly' {
+        publishDir = [
+            path: { "${params.outdir}/mafs/tumor_only" },
+            mode: 'copy'
+        ]
+    }
+
+ withName: 'combinemafs_tn' {
+        publishDir = [
+            path: { "${params.outdir}/mafs/paired" },
+            mode: 'copy'
+        ]
+    }
+
+ withName: 'combineVariants|combineVariants_alternative' {
+        publishDir = [
+            path: { "${params.outdir}/vcfs" },
+            mode: 'copy'
+        ]
+    }
+
+ withName: 'combineVariants_strelka' {
+        publishDir = [
+            path: { "${params.outdir}/vcfs/strelka" },
+            mode: 'copy'
+        ]
+    }
+
+ withName: 'somaticcombine_tonly' {
+        publishDir = [
+            path: { "${params.outdir}/vcfs/combined_tonly" },
+            mode: 'copy'
+        ]
+    }
+
+  withName: 'somaticcombine' {
+        publishDir = [
+            path: { "${params.outdir}/vcfs/combined" },
+            mode: 'copy'
+        ]
+    }   
 }
diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf
index 392270e..757465a 100644
--- a/modules/local/copynumber.nf
+++ b/modules/local/copynumber.nf
@@ -58,7 +58,6 @@ process seqz_sequenza_bychr {
 
 process sequenza {
     label 'process_highcpu'
-    publishDir("${outdir}/cnv/sequenza", mode: 'copy')
 
     input:
         tuple val(pairid), path(seqz)
@@ -124,7 +123,6 @@ process sequenza {
 
 process freec_paired {
     label 'process_highcpu'
-    publishDir("${outdir}/cnv/freec", mode: 'copy')
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai)
@@ -171,7 +169,6 @@ process freec_paired {
 
 process freec {
     label 'process_mid'
-    publishDir("${outdir}/cnv/freec", mode: 'copy')
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai)
@@ -217,7 +214,6 @@ process freec {
 
 process amber_tonly {
     label 'process_mid'
-    publishDir("${outdir}/cnv/amber", mode: 'copy')
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai)
@@ -253,7 +249,6 @@ process amber_tonly {
 
 process amber_tn {
     label 'process_mid'
-    publishDir("${outdir}/cnv/amber", mode: 'copy')
     
     input:
         tuple val(tumorname), path(tumor), path(tumorbai),
@@ -290,7 +285,6 @@ process amber_tn {
 
 process cobalt_tonly {
     label "process_mid"
-    publishDir("${outdir}/cnv/cobalt", mode: 'copy')
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai)
@@ -324,7 +318,6 @@ process cobalt_tonly {
 
 process cobalt_tn {
     label "process_mid"
-    publishDir("${outdir}/cnv/cobalt", mode: 'copy')
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai),
diff --git a/modules/local/germline.nf b/modules/local/germline.nf
index 6896f68..c106683 100644
--- a/modules/local/germline.nf
+++ b/modules/local/germline.nf
@@ -5,7 +5,6 @@ MODEL="/opt/models/wgs/model.ckpt"
 //Processes
 //Deep Variant
 process deepvariant_step1 {
-    module=['deepvariant/1.4.0']
     
     input:
         tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path(bed)
@@ -41,7 +40,6 @@ process deepvariant_step1 {
 //Step 2 requires GPU
 process deepvariant_step2 {
     
-    module=['deepvariant/1.4.0']
     
     input:
         tuple val(samplename), path(tfrecords), path(tfgvcf)
@@ -70,9 +68,7 @@ process deepvariant_step2 {
 
 //Step 3 DV
 process deepvariant_step3 {
-    publishDir("${outdir}/deepvariant", mode: 'copy')
 
-    module=['deepvariant/1.4.0']
     
     input:
         tuple val(samplename), path(tfrecords), path("${samplename}_call_variants_output.tfrecord.gz"),
@@ -104,9 +100,7 @@ process deepvariant_step3 {
 
 //Combined DeepVariant
 process deepvariant_combined {
-    module=['deepvariant/1.4.0']
 
-    publishDir("${outdir}/deepvariant", mode: 'copy')
 
     input:
         tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai")
@@ -140,9 +134,7 @@ process deepvariant_combined {
 
 process glnexus {
 
-    module=['glnexus','bcftools']
-
-    publishDir("${outdir}/deepvariant", mode: 'copy')    
+ 
     input:
         path(gvcfs)
     
diff --git a/modules/local/qc.nf b/modules/local/qc.nf
index dd2bdfa..82bcc1a 100644
--- a/modules/local/qc.nf
+++ b/modules/local/qc.nf
@@ -18,7 +18,6 @@ SCRIPT_PATH_PCA = file(params.script_ancestry)
 //OUTPUT DIRECTORY 
 process fc_lane {
     label 'process_low'
-    publishDir("${outdir}/QC/fc_lane/", mode:'copy')
 
     input:
         tuple val(samplename), path(fqs)
@@ -46,7 +45,6 @@ process fc_lane {
 process fastq_screen {
     //Uses Trimmed Files
 
-    publishDir(path: "${outdir}/QC/fastq_screen/", mode:'copy')
 
     input:
     tuple val(samplename),
@@ -96,7 +94,6 @@ process kraken {
     @Output:
         Kraken logfile and interative krona report
     */
-    publishDir(path: "${outdir}/QC/kraken/", mode: 'copy')
     
     input:
         tuple val(samplename), 
@@ -148,7 +145,6 @@ process fastqc {
         FastQC report and zip file containing sequencing quality information
     """
 
-    publishDir(path: "${outdir}/QC/fastqc/", mode: 'copy')
 
     input:
         tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bai")
@@ -186,12 +182,8 @@ process qualimap_bamqc {
         Recalibrated BAM file (scatter)
     @Output:
         Report containing post-aligment quality-control metrics
-    */
-    publishDir("${outdir}/QC/qualimap/", mode: "copy")
-    
-    //module=['qualimap/2.2.1','java/12.0.1']
-    //module: config['images']['qualimap']
-    
+    */    
+
     input:
         tuple val(samplename), path(bam), path(bai)
 
@@ -233,8 +225,6 @@ process samtools_flagstats {
         Text file containing alignment statistics
     */
     label 'process_mid'
-
-    publishDir("${outdir}/QC/flagstats/", mode: "copy")
     
     input:
         tuple val(samplename), path(bam), path(bai)
@@ -268,9 +258,6 @@ process mosdepth {
         `{prefix}.quantized.bed.gz` (if --quantize is specified)
         `{prefix}.thresholds.bed.gz` (if --thresholds is specified)
     */
-
-    publishDir("${outdir}/QC/mosdepth/", mode: "copy")
-
     input:
         tuple val(samplename), path(bam), path(bai)
     
@@ -309,7 +296,6 @@ process vcftools {
     */
     label 'process_mid'
 
-    publishDir(path:"${outdir}/QC/vcftools", mode: 'copy')
     
     input: 
         tuple path(germlinevcf),path(germlinetbi)
@@ -338,9 +324,7 @@ process collectvariantcallmetrics {
         Multi-sample gVCF file (indirect-gather-due-to-aggregation)
     @Output:
         Text file containing a collection of metrics relating to snps and indels 
-    */
-    publishDir("${outdir}/QC/variantmetrics", mode: 'copy')
-    
+    */    
     input: 
         tuple path(germlinevcf),path(germlinetbi)
     
@@ -381,7 +365,6 @@ process bcftools_stats {
     */
 
     label 'process_mid'
-    publishDir("${outdir}/QC/bcftoolsstat", mode: 'copy')
 
     input:
         tuple val(samplename),  path("${samplename}.gvcf.gz"),path("${samplename}.gvcf.gz.tbi")
@@ -415,8 +398,6 @@ process gatk_varianteval {
     */
     label 'process_mid'
 
-    publishDir("${outdir}/QC/gatk_varianteval", mode: 'copy')
-
     input: 
         tuple val(samplename), path("${samplename}.gvcf.gz") ,path("${samplename}.gvcf.gz.tbi")
     output: 
@@ -458,7 +439,6 @@ process snpeff {
         Evaluation table containing a collection of summary statistics
     */
     label 'process_mid'
-    publishDir("${outdir}/QC/snpeff", mode: 'copy')
 
     input:  
         tuple val(samplename), path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi")
@@ -495,7 +475,6 @@ process somalier_extract {
         Exracted sites in (binary) somalier format
     */
     label 'process_low'
-    publishDir("${outdir}/QC/somalier", mode: 'copy')
 
     input:
         tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai")
@@ -536,7 +515,6 @@ process somalier_analysis_human {
     */
     label 'process_low'
 
-    publishDir("${outdir}/QC/somalier", mode: 'copy')
 
     input:
         path(somalierin)
@@ -601,8 +579,6 @@ process somalier_analysis_mouse {
     */
     label 'process_low'
 
-    publishDir("${outdir}/QC/somalier", mode: 'copy')
-
     input:
         path(somalierin)
     
@@ -653,8 +629,6 @@ process multiqc {
     @Output:
         Interactive MulitQC report and a QC metadata table
     """
-
-    publishDir("${outdir}/QC/multiqc", mode: 'copy')
     
     input:  
         path(allqcin)
diff --git a/modules/local/structural_variant.nf b/modules/local/structural_variant.nf
index 6182624..a6f58f4 100644
--- a/modules/local/structural_variant.nf
+++ b/modules/local/structural_variant.nf
@@ -8,8 +8,6 @@ DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS)
 process svaba_somatic {
     label 'process_highcpu'
 
-    publishDir(path: "${outdir}/SV/svaba", mode: 'copy') 
-
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai)
     
@@ -60,7 +58,6 @@ process svaba_somatic {
 process manta_somatic {
 
     label 'process_highcpu'
-    publishDir(path: "${outdir}/SV/manta", mode: 'copy') 
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai)
@@ -107,7 +104,6 @@ process annotsv_tn {
      //Requires bedtools,bcftools
 
     module = ['annotsv/3.3.1']
-    publishDir(path: "${outdir}/SV/annotated", mode: 'copy') 
 
     input:
         tuple val(tumorname), path(somaticvcf), val(sv)
@@ -141,7 +137,6 @@ process annotsv_tn {
 
 process manta_tonly {
     label 'process_highcpu'
-    publishDir(path: "${outdir}/SV/manta_tonly", mode: 'copy') 
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai)
@@ -184,7 +179,6 @@ process manta_tonly {
 
 process svaba_tonly {
     label 'process_highcpu'
-    publishDir(path: "${outdir}/SV/svaba_tonly", mode: 'copy') 
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai)
@@ -250,7 +244,6 @@ process gunzip {
 
 process survivor_sv {
     module = ['survivor']
-    publishDir(path: "${outdir}/SV/survivor", mode: 'copy') 
 
     input:
         tuple val(tumorname), 
diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf
index 19dfa81..4fa34db 100644
--- a/modules/local/trim_align.nf
+++ b/modules/local/trim_align.nf
@@ -5,7 +5,6 @@ KNOWNRECAL = params.genomes[params.genome].KNOWNRECAL
 process fastp {
     label 'process_mid'
     tag { name }
-    publishDir(path: "${outdir}/QC/fastp", mode: 'copy', pattern: '{*fastp.json,*fastp.html}') 
 
     input:
     tuple val(samplename), path(fqs)
@@ -135,8 +134,7 @@ process applybqsr {
     Base quality recalibration for all samples to 
     */   
     label 'process_low'
-    publishDir(path: "${outdir}/bams/BQSR", mode: 'copy') 
-
+    
     input:
         tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path("${samplename}.recal_data.grp")
 
diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index 9d7892e..f9fdff2 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -115,8 +115,6 @@ process pileup_paired_n {
 process contamination_paired {
     label 'process_highmem'
 
-    publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')
-
     input:
         tuple val(tumorname),
         path(tumor_pileups),
@@ -170,8 +168,6 @@ process contamination_paired {
 process learnreadorientationmodel {
     label 'process_highmem'
 
-    publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')
-
     input:
         tuple val(sample), path(f1r2)
       
@@ -197,8 +193,6 @@ process learnreadorientationmodel {
 process mergemut2stats {
     label 'process_low'
 
-    publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')
-
     input:
         tuple val(sample), path(stats)
       
@@ -225,8 +219,6 @@ process mergemut2stats {
 process mutect2filter {
     label 'process_mid'
         
-    publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')
-
     input:
         tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs), 
         path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination)
@@ -556,7 +548,6 @@ process muse_tn {
 
 process combineVariants {
     label 'process_highmem'
-    publishDir(path: "${outdir}/vcfs/", mode: 'copy')
 
     input:
         tuple val(sample), path(inputvcf), val(vc)
@@ -606,7 +597,6 @@ process combineVariants {
 
 process combineVariants_alternative {
     label 'process_highmem'
-    publishDir(path: "${outdir}/vcfs/", mode: 'copy')
 
     input:
         tuple val(sample), path(vcfs), path(vcfsindex), val(vc)
@@ -681,7 +671,6 @@ process bcftools_index_octopus {
 process combineVariants_strelka {
     //Concat all somatic snvs/indels across all files, strelka separates snv/indels
     label 'process_mid'
-    publishDir(path: "${outdir}/vcfs/strelka", mode: 'copy')
 
     input:
         tuple val(sample), 
@@ -727,7 +716,6 @@ process combineVariants_strelka {
 
 process somaticcombine {
     label 'process_mid'
-    publishDir(path: "${outdir}/vcfs/combined", mode: 'copy')
 
     input: 
         tuple val(tumorsample), val(normal),
@@ -766,7 +754,6 @@ process somaticcombine {
 
 
 process annotvep_tn {    
-    publishDir(path: "${outdir}/mafs/", mode: 'copy')
 
     input:
         tuple val(tumorsample), val(normalsample), 
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index a252597..16e6e51 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -44,7 +44,6 @@ process pileup_paired_tonly {
 
 process contamination_tumoronly {
     label 'process_highmem'
-    publishDir(path: "${outdir}/vcfs/mutect2/", mode: 'copy')
 
     input:
         tuple val(tumorname),
@@ -83,7 +82,6 @@ process contamination_tumoronly {
 
 process learnreadorientationmodel_tonly {
     label 'process_highmem'
-    publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')
 
     input:
         tuple val(sample), path(f1r2)
@@ -112,7 +110,6 @@ process learnreadorientationmodel_tonly {
 
 process mergemut2stats_tonly {
     label 'process_low'
-    publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')
 
     input:
         tuple val(sample), path(stats)
@@ -178,7 +175,6 @@ process mutect2_t_tonly {
 
 process mutect2filter_tonly {
     label 'process_mid'
-    publishDir(path: "${outdir}/vcfs/mutect2_tonly", mode: 'copy')
 
     input:
         tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups),path(tumorcontamination)
@@ -441,7 +437,6 @@ process annotvep_tonly {
 
 process combinemafs_tonly {
     label 'process_low'
-    publishDir(path: "${outdir}/mafs/tumor_only", mode: 'copy')
 
     input: 
         path(allmafs)

From 9091ad8000b44bd925eb82c4b54a556917626cf7 Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Wed, 13 Dec 2023 11:48:51 -0500
Subject: [PATCH 16/58] fix: varscan fixes

---
 conf/modules.config                    |  1 +
 modules/local/variant_calling_tonly.nf |  2 +-
 nextflow.config                        |  2 +-
 subworkflows/local/workflows.nf        |  4 +--
 subworkflows/local/workflows_tonly.nf  | 35 ++++++++++++++------------
 5 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 3c8a778..5ac0c2d 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -245,6 +245,7 @@ process {
             path: { "${params.outdir}/mafs" },
             mode: 'copy'
         ]
+            errorStrategy='ignore'
     }
 
  withName: 'combinemafs_tonly' {
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index 16e6e51..1c58bc6 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -239,7 +239,7 @@ process varscan_tonly {
 
     '''
     varscan_opts="--strand-filter 0 --min-var-freq 0.01 --output-vcf 1 --variants 1"
-    pileup_cmd="samtools mpileup -d 100000 -q 15 -Q 15 -f !{GENOMEREF} !{tumor}"
+    pileup_cmd="samtools mpileup -d 100000 -q 15 -Q 15 -f !{GENOMEREF} -l !{bed} !{tumor}"
     varscan_cmd="varscan mpileup2cns <($pileup_cmd) $varscan_opts"
 
     eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf"
diff --git a/nextflow.config b/nextflow.config
index c3b84d4..1718100 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -28,7 +28,7 @@ params {
     freec_significance = "${projectDir}/bin/assess_significance.R"
     freec_plot = "${projectDir}/bin/makeGraph.R"
     lofreq_convert = "${projectDir}/bin/add_gt_lofreq.sh"
-    split_regions = "36" //Number of regions to split by 
+    split_regions = "24" //Number of regions to split by 
 
     vep_cache = "/fdb/VEP/102/cache"
 
diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf
index d9dab2d..875c317 100644
--- a/subworkflows/local/workflows.nf
+++ b/subworkflows/local/workflows.nf
@@ -546,8 +546,8 @@ workflow INPUT_BAM {
             baminputonly=Channel.fromPath(params.bam_input)
            .map{it-> tuple(it.simpleName,it,file("${it}.bai"))}
         }
-        if (bamcheck2.size()>0){
-            bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)}.view()
+              else if (bamcheck2.size()>0){
+            bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)}
             baminputonly=Channel.fromPath(params.bam_input)
            .map{it-> tuple(it.simpleName,it)}
            .join(bai)
diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf
index ca5eb5f..04c0aaa 100644
--- a/subworkflows/local/workflows_tonly.nf
+++ b/subworkflows/local/workflows_tonly.nf
@@ -137,8 +137,8 @@ workflow VC_TONLY {
     bambyinterval=bamwithsample.combine(splitout.flatten())
     pileup_paired_tonly(bambyinterval)
     pileup_paired_tout=pileup_paired_tonly.out.groupTuple()
-    .map{samplename,pileups-> tuple( samplename,
-    pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tumor.pileup.table/)[0][1].toInteger() } ,
+        .map{samplename,pileups-> tuple( samplename,
+        pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tumor.pileup.table/)[0][1].toInteger() } ,
     )}
 
     mutect2_t_tonly(bambyinterval)    
@@ -162,9 +162,9 @@ workflow VC_TONLY {
 
     
     mut2tonly_filter=mut2tonlyout.allmut2tonly
-    .join(mergemut2stats_tonly.out)
-    .join(learnreadorientationmodel_tonly.out)
-    .join(contamination_tumoronly.out)
+        | join(mergemut2stats_tonly.out)
+        | join(learnreadorientationmodel_tonly.out)
+        | join(contamination_tumoronly.out) 
 
     mutect2_tonly_in=mutect2filter_tonly(mut2tonly_filter) 
         | join(sample_sheet)
@@ -173,15 +173,17 @@ workflow VC_TONLY {
 
 
     //VarDict
-    vardict_in_tonly=vardict_tonly(bambyinterval) | groupTuple()| map{tumor,vcf -> tuple(tumor,vcf,"vardict_tonly")} 
+    vardict_in_tonly=vardict_tonly(bambyinterval) | groupTuple()
+        | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.vardict.vcf/)[0][1].toInteger()},"vardict_tonly")}
         | combineVariants_vardict_tonly
         | join(sample_sheet)
         | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"vardict_tonly",normvcf,normindex)}
     annotvep_tonly_vardict(vardict_in_tonly)
 
     //VarScan_tonly
-    varscan_in_tonly=bambyinterval.join(contamination_tumoronly.out)
-        | varscan_tonly | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf,"varscan")} 
+    varscan_in_tonly=bambyinterval.combine(contamination_tumoronly.out,by: 0)
+        | varscan_tonly | groupTuple() 
+        | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf/)[0][1].toInteger()},"varscan_tonly")
         | combineVariants_varscan_tonly 
         | join(sample_sheet)
         | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"varscan_tonly",normvcf,normindex)} 
@@ -197,8 +199,9 @@ workflow VC_TONLY {
     annotvep_tonly_octopus(octopus_in_tonly)
 
 
-    mutect2_tonly_in|concat(octopus_in_tonly)
-        | concat(vardict_in_tonly)|concat(varscan_in_tonly)
+    mutect2_tonly_in | concat(octopus_in_tonly)
+        | concat(vardict_in_tonly) | concat(varscan_in_tonly)
+        | groupTuple()
         | somaticcombine_tonly 
         | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} 
         | annotvep_tonly_combined
@@ -324,20 +327,20 @@ workflow INPUT_TONLY_BAM {
     main:
     //Either BAM Input or File sheet input 
     if(params.bam_input){
-        bambai = params.bam_input +".bai"
+        bambai = params.bam_input + ".bai"
         baionly = bambai.replace(".bam", "")
         bamcheck1 = file(bambai)
         bamcheck2 = file(baionly)
 
         if (bamcheck1.size()>0){
             baminputonly=Channel.fromPath(params.bam_input)
-           .map{it-> tuple(it.simpleName,it,file("${it}.bai"))}
+                | map{it-> tuple(it.simpleName,it,file("${it}.bai"))} 
         }
-        if (bamcheck2.size()>0){
-            bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)}.view()
+        else if (bamcheck2.size()>0){
+            bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)}
             baminputonly=Channel.fromPath(params.bam_input)
-           .map{it-> tuple(it.simpleName,it)}
-           .join(bai)
+            | map{it-> tuple(it.simpleName,it)}
+            | join(bai)
         }
 
         sample_sheet=baminputonly.map{samplename,bam,bai -> tuple (

From d84dc3ec505054415420c58252d219a602766e5d Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Wed, 13 Dec 2023 11:57:57 -0500
Subject: [PATCH 17/58] feat: add setup

---
 modules/local/variant_calling.nf | 1 -
 setup.py                         | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 setup.py

diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index f9fdff2..188a9e9 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -754,7 +754,6 @@ process somaticcombine {
 
 
 process annotvep_tn {    
-
     input:
         tuple val(tumorsample), val(normalsample), 
         val(vc), path(tumorvcf), path(vcfindex) 
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..4a15e68
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,4 @@
+import setuptools
+
+if __name__ == "__main__":
+    setuptools.setup()
\ No newline at end of file

From daf943a3eaf86265016ad8d4fcf4b36c584f7c3d Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Wed, 13 Dec 2023 12:14:10 -0500
Subject: [PATCH 18/58] fix: add citation

---
 CITATION.cff | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 CITATION.cff

diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 0000000..d1bc133
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,15 @@
+cff-version: 1.2.0
+message: "Please cite LOGAN as below." # TODO set up Zenodo to archive your tool and assign a DOI. Or if TOOL_NAME gets published in a journal, include the citation here.
+authors: # TODO: author names should match those in pyproject.toml
+  - family-names: LASTNAME1
+    given-names: FIRSTNAME1
+  - family-names: Sovacool
+    given-names: Kelly
+    orcid: https://orcid.org/0000-0003-3283-829X
+  - family-names: Koparde
+    given-names: Vishal
+    orcid: https://orcid.org/0000-0001-8978-8495
+title: "TOOL_NAME: insert one-line description here" # TODO: citation title should match pyproject.toml
+url: https://ccbr.github.io/TOOL_NAME/
+repository-code: https://github.com/CCBR/TOOL_NAME
+license: MIT
\ No newline at end of file

From c1a89590464c9e2e1135cbd71b8ee29ee279f381 Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Wed, 13 Dec 2023 12:14:23 -0500
Subject: [PATCH 19/58] fix: missing closing

---
 subworkflows/local/workflows_tonly.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf
index 04c0aaa..1d9c2de 100644
--- a/subworkflows/local/workflows_tonly.nf
+++ b/subworkflows/local/workflows_tonly.nf
@@ -183,7 +183,7 @@ workflow VC_TONLY {
     //VarScan_tonly
     varscan_in_tonly=bambyinterval.combine(contamination_tumoronly.out,by: 0)
         | varscan_tonly | groupTuple() 
-        | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf/)[0][1].toInteger()},"varscan_tonly")
+        | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf/)[0][1].toInteger()},"varscan_tonly")}
         | combineVariants_varscan_tonly 
         | join(sample_sheet)
         | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"varscan_tonly",normvcf,normindex)} 

From b694a369b7e696c671890483cfc7f84b619060f5 Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Wed, 13 Dec 2023 16:06:55 -0500
Subject: [PATCH 20/58] fix: varscan alleles

---
 modules/local/variant_calling_tonly.nf | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index 1c58bc6..6761229 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -242,7 +242,10 @@ process varscan_tonly {
     pileup_cmd="samtools mpileup -d 100000 -q 15 -Q 15 -f !{GENOMEREF} -l !{bed} !{tumor}"
     varscan_cmd="varscan mpileup2cns <($pileup_cmd) $varscan_opts"
 
-    eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf"
+    eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp"
+
+    awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp \
+        | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf
 
     printf "TUMOR\t!{tumorname}\n" > sampname 
     

From 5ebee2c3179bc3e37be9ae85236dd3875ceaa582 Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Wed, 13 Dec 2023 16:56:57 -0500
Subject: [PATCH 21/58] refactor: citation

---
 CITATION.cff | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CITATION.cff b/CITATION.cff
index d1bc133..3562005 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -1,8 +1,8 @@
 cff-version: 1.2.0
 message: "Please cite LOGAN as below." # TODO set up Zenodo to archive your tool and assign a DOI. Or if TOOL_NAME gets published in a journal, include the citation here.
 authors: # TODO: author names should match those in pyproject.toml
-  - family-names: LASTNAME1
-    given-names: FIRSTNAME1
+  - family-names: Nousome
+    given-names: Darryl
   - family-names: Sovacool
     given-names: Kelly
     orcid: https://orcid.org/0000-0003-3283-829X

From 96b2415aaeb78d7f04c4d82f70e35b7c9546236f Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Wed, 13 Dec 2023 17:06:54 -0500
Subject: [PATCH 22/58] docs: start update for MKdocs

---
 mkdocs.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mkdocs.yml b/mkdocs.yml
index e6e1e40..91186b8 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -1,4 +1,4 @@
-site_name: CCBR wgs-seek
+site_name: CCBR LOGAN
 site_description: >-
   CCBR Whole Genome Sequencing Pipeline 
 nav:

From 3b9c4e4e698940e716f374af5eb88717c5728192 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Thu, 14 Dec 2023 11:39:52 -0500
Subject: [PATCH 23/58] fix: IUPAC codes

---
 modules/local/variant_calling.nf       | 12 ++++++------
 modules/local/variant_calling_tonly.nf |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index 188a9e9..6508d20 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -250,7 +250,7 @@ process mutect2filter {
     
     bcftools sort ${tumor}_vs_${normal}.mut2.final.vcf.gz |\
     bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
-        awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
+        awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\
         sed '/^\$/d' | bcftools view - -Oz -o ${tumor}_vs_${normal}.mut2.norm.vcf.gz
     bcftools index -t ${tumor}_vs_${normal}.mut2.norm.vcf.gz
     """
@@ -392,9 +392,9 @@ process varscan_tn {
     varscan_cmd="varscan somatic <($dual_pileup) !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf $varscan_opts --mpileup 1"
     eval "$varscan_cmd"
 
-    awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel \
+    awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel \
         | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp.vcf.gz
-    awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp \
+    awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp \
         | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz
 
     gatk SortVcf -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz \
@@ -569,7 +569,7 @@ process combineVariants {
         -SD $GENOMEDICT \
         -I $vcfin
     bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
-        awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
+        awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\
         sed '/^\$/d' > ${sample}.${vc}.temp.vcf
 
     bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz
@@ -617,7 +617,7 @@ process combineVariants_alternative {
     bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf
     bcftools sort ${sample}.${vc}.temp.vcf -Oz -o ${sample}.${vc}.marked.vcf.gz
     bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
-        awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
+        awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\
         sed '/^\$/d' > ${sample}.${vc}.temp.vcf
 
     bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz
@@ -692,7 +692,7 @@ process combineVariants_strelka {
     """
     bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz -a 
     bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
-        awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
+        awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\
         sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz
 
     bcftools sort ${sample}.temp1.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz 
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index 6761229..4c4d0a7 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -208,7 +208,7 @@ process mutect2filter_tonly {
 
     bcftools sort ${sample}.tonly.mut2.final.vcf.gz |\
     bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
-        awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\t"; print}}' |\
+        awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\
         sed '/^\$/d' |\
     bcftools view - -Oz -o  ${sample}.tonly.mut2.norm.vcf.gz
     bcftools index -t ${sample}.tonly.mut2.norm.vcf.gz
@@ -244,7 +244,7 @@ process varscan_tonly {
 
     eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp"
 
-    awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp \
+    awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp \
         | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf
 
     printf "TUMOR\t!{tumorname}\n" > sampname 

From 6a540bf1b220d615e91b6481d0e9e141ab44259e Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Thu, 14 Dec 2023 20:52:54 -0500
Subject: [PATCH 24/58] fix: cli settings

---
 main.nf                                | 76 ++++++++++++++------------
 modules/local/variant_calling.nf       |  2 +-
 modules/local/variant_calling_tonly.nf |  6 +-
 src/__main__.py                        |  6 +-
 4 files changed, 50 insertions(+), 40 deletions(-)

diff --git a/main.nf b/main.nf
index e1e7422..2988cc5 100644
--- a/main.nf
+++ b/main.nf
@@ -4,53 +4,60 @@ nextflow.enable.dsl=2
 date = new Date().format( 'yyyyMMdd' )
 
 
-//SUB WORKFLOWS to SPLIT
-PIPE_ALIGN=params.PIPE_ALIGN
+log.info """\
+         L O G A E E K   P I P E L I N E    
+         =============================
+         genome: ${params.genome}
+         outdir: ${params.outdir}
+         Samplesheet: ${params.sample_sheet}
+         Samples: ${params.fastq_input} ${params.file_input} ${params.bam_input}
+         """
+         .stripIndent()
 
-PIPE_VC=params.PIPE_VC
-PIPE_SV=params.PIPE_SV
-PIPE_CNV=params.PIPE_CNV
 
-PIPE_QC_GL=params.PIPE_QC_GL
-PIPE_QC_NOGL=params.PIPE_QC_NOGL
 
-PIPE_GL=params.PIPE_GL
+include {INPUT; ALIGN; GL;
+    VC; INPUT_BAM; SV; CNVmouse; CNVhuman;
+    QC_GL; QC_NOGL} from "./subworkflows/local/workflows.nf"
 
-PIPE_TONLY_ALIGN=params.PIPE_TONLY_ALIGN
-PIPE_TONLY_VC=params.PIPE_TONLY_VC
-PIPE_TONLY_SV=params.PIPE_TONLY_SV
-PIPE_TONLY_CNV=params.PIPE_TONLY_CNV
-PIPE_TONLY_QC=params.PIPE_TONLY_QC
+include {INPUT_TONLY; INPUT_TONLY_BAM;
+    ALIGN_TONLY;
+    VC_TONLY; SV_TONLY; CNVhuman_tonly; CNVmouse_tonly; QC_TONLY } from "./subworkflows/local/workflows_tonly.nf"
 
 
-PIPE_BAMVC=params.PIPE_BAMVC
-PIPE_BAMSV=params.PIPE_BAMCNV
-PIPE_BAMCNV=params.PIPE_BAMCNV
 
-PIPE_TONLY_BAMVC=params.PIPE_TONLY_BAMVC
-PIPE_TONLY_BAMSV=params.PIPE_TONLY_BAMSV
-PIPE_TONLY_BAMCNV=params.PIPE_TONLY_BAMCNV
 
+//SUB WORKFLOWS to SPLIT
+PIPE_ALIGN=params.align
+
+PIPE_VC=params.vc
+PIPE_SV=params.sv
+PIPE_CNV=params.cnv
+
+PIPE_QC_GL=params.qc_gl
+PIPE_QC_NOGL=params.qc_nogl
+
+PIPE_GL=params.gl
+
+PIPE_TONLY_ALIGN=params.align_tumoronly
+PIPE_TONLY_VC=params.vc_tumoronly
+PIPE_TONLY_SV=params.sv_tumoronly
+PIPE_TONLY_CNV=params.cnv_tumoronly
+PIPE_TONLY_QC=params.qc_tumoronly
+
+
+PIPE_BAMVC=params.vc_bam
+PIPE_BAMSV=params.sv_bam
+PIPE_BAMCNV=params.cnv_bam
+
+PIPE_TONLY_BAMVC=params.vc_bam_tumoronly
+PIPE_TONLY_BAMSV=params.sv_bam_tumoronly
+PIPE_TONLY_BAMCNV=params.cnv_bam_tumoronly
 
 
-include {INPUT; ALIGN; GL;
-    VC; INPUT_BAM; SV; CNVmouse; CNVhuman;
-    QC_GL; QC_NOGL} from "./subworkflows/local/workflows.nf"
 
-include {INPUT_TONLY; INPUT_TONLY_BAM;
-    ALIGN_TONLY;
-    VC_TONLY; SV_TONLY; CNVhuman_tonly; CNVmouse_tonly; QC_TONLY } from "./subworkflows/local/workflows_tonly.nf"
 
 
-log.info """\
-         W G S S E E K   P I P E L I N E    
-         =============================
-         genome: ${params.genome}
-         outdir: ${params.outdir}
-         Samplesheet: ${params.sample_sheet}
-         Samples: ${params.fastq_input} ${params.file_input} ${params.bam_input}
-         """
-         .stripIndent()
 
 workflow.onComplete {
     if (!workflow.stubRun && !workflow.commandLine.contains('-preview')) {
@@ -61,7 +68,6 @@ workflow.onComplete {
     }
 }
 
-//Final Workflow
 //Final Workflow
 workflow {
 
diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index 6508d20..e23cd0e 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -399,7 +399,7 @@ process varscan_tn {
 
     gatk SortVcf -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz \
     -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp.vcf.gz \
-    -R  !{GENOMEREF} -SD !{GENOMEDICT} \
+    -R !{GENOMEREF} -SD !{GENOMEDICT} \
     -O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf
 
     printf "NORMAL\t!{normalname}\nTUMOR\t!{tumorname}\n" > sampname 
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index 4c4d0a7..9f8bf93 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -1,9 +1,9 @@
 GENOMEREF=file(params.genomes[params.genome].genome)
 GENOMEFAI=file(params.genomes[params.genome].genomefai)
 GENOMEDICT=file(params.genomes[params.genome].genomedict)
-KGPGERMLINE=params.genomes[params.genome].kgp //1000G_phase1.snps.high_confidence.hg38.vcf.gz"
-DBSNP=file(params.genomes[params.genome].dbsnp) //dbsnp_138.hg38.vcf.gz"
-GNOMADGERMLINE=params.genomes[params.genome].gnomad //somatic-hg38-af-only-gnomad.hg38.vcf.gz
+KGPGERMLINE=params.genomes[params.genome].kgp 
+DBSNP=file(params.genomes[params.genome].dbsnp) 
+GNOMADGERMLINE=params.genomes[params.genome].gnomad 
 PON=file(params.genomes[params.genome].pon) 
 VEPCACHEDIR=file(params.genomes[params.genome].vepcache)
 VEPSPECIES=params.genomes[params.genome].vepspecies
diff --git a/src/__main__.py b/src/__main__.py
index 0a70824..1214479 100644
--- a/src/__main__.py
+++ b/src/__main__.py
@@ -31,7 +31,11 @@ def common_options(func):
     cls=OrderedCommands, context_settings=dict(help_option_names=["-h", "--help"])
 )
 @click.version_option(get_version(), "-v", "--version", is_flag=True)
-@click.option("--citation", is_flag=True, callback=print_citation, expose_value=False, help="Print the citation in bibtex format and exit.")
+#@click.option("--citation", 
+#              is_flag=True, 
+#              callback=print_citation, 
+#              expose_value=False, 
+#              help="Print the citation in bibtex format and exit.")
 def cli():
     """whoLe genOme-sequencinG Analysis pipeliNe
 

From 3301ad0ec837857452a92bd2564d84ecb88ea0e4 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Thu, 14 Dec 2023 22:00:36 -0500
Subject: [PATCH 25/58] fix: update for cli interface

---
 main.nf                               | 219 +++++++++++---------------
 nextflow.config                       |  32 ++--
 subworkflows/local/workflows_tonly.nf |   4 +-
 3 files changed, 106 insertions(+), 149 deletions(-)

diff --git a/main.nf b/main.nf
index 2988cc5..cb2ca03 100644
--- a/main.nf
+++ b/main.nf
@@ -5,11 +5,11 @@ date = new Date().format( 'yyyyMMdd' )
 
 
 log.info """\
-         L O G A E E K   P I P E L I N E    
+         L O G A N     P I P E L I N E    
          =============================
          genome: ${params.genome}
          outdir: ${params.outdir}
-         Samplesheet: ${params.sample_sheet}
+         Sample Sheet: ${params.sample_sheet}
          Samples: ${params.fastq_input} ${params.file_input} ${params.bam_input}
          """
          .stripIndent()
@@ -28,37 +28,6 @@ include {INPUT_TONLY; INPUT_TONLY_BAM;
 
 
 //SUB WORKFLOWS to SPLIT
-PIPE_ALIGN=params.align
-
-PIPE_VC=params.vc
-PIPE_SV=params.sv
-PIPE_CNV=params.cnv
-
-PIPE_QC_GL=params.qc_gl
-PIPE_QC_NOGL=params.qc_nogl
-
-PIPE_GL=params.gl
-
-PIPE_TONLY_ALIGN=params.align_tumoronly
-PIPE_TONLY_VC=params.vc_tumoronly
-PIPE_TONLY_SV=params.sv_tumoronly
-PIPE_TONLY_CNV=params.cnv_tumoronly
-PIPE_TONLY_QC=params.qc_tumoronly
-
-
-PIPE_BAMVC=params.vc_bam
-PIPE_BAMSV=params.sv_bam
-PIPE_BAMCNV=params.cnv_bam
-
-PIPE_TONLY_BAMVC=params.vc_bam_tumoronly
-PIPE_TONLY_BAMSV=params.sv_bam_tumoronly
-PIPE_TONLY_BAMCNV=params.cnv_bam_tumoronly
-
-
-
-
-
-
 workflow.onComplete {
     if (!workflow.stubRun && !workflow.commandLine.contains('-preview')) {
         def message = Utils.spooker(workflow)
@@ -70,123 +39,111 @@ workflow.onComplete {
 
 //Final Workflow
 workflow {
-
-    if (PIPE_ALIGN){
+    //Inputs
+    if (params.fastq_input && params.sample_sheet){
         INPUT()
         ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
-    } 
     //Germline
-    if (PIPE_GL){
-        INPUT()
-        ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
-        GL(ALIGN.out.bambyinterval)
-    }
-
-    //Tumor-Normal Pipelines
-    if (PIPE_VC){
-        INPUT()
-        ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
-        VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet)
-    }
-    if (PIPE_QC_GL){
-        INPUT()
-        ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
-        GL(ALIGN.out.bambyinterval)
-        QC_GL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout,GL.out.glnexusout,GL.out.bcfout)
-    }  
-    if (PIPE_QC_NOGL){
-        INPUT()
-        ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
-        QC_NOGL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout)
-    }  
-    if (PIPE_SV){
-        INPUT()
-        ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
-        SV(ALIGN.out.bamwithsample)
-    }  
-    if (PIPE_CNV){
-        INPUT()
-        ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
-        if (params.genome == "mm10"){
-            CNVmouse(ALIGN.out.bamwithsample)
-        } else if (params.genome== "hg38"){
-            VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet)
-            CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input)
-
+        if (params.gl){
+           GL(ALIGN.out.bambyinterval)
         }
+        //Tumor-Normal VC, SV, CNV
+        if (params.vc){
+            VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet)
+        }   
+        if (params.sv){
+            SV(ALIGN.out.bamwithsample)
+        }  
+        if (params.cnv){
+            if (params.genome == "mm10"){
+                CNVmouse(ALIGN.out.bamwithsample)
+            } else if (params.genome== "hg38"){
+                if (!params.vc){
+                    VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet)
+                    CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input)
+                } else {
+                    CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input)
+                }
+            }
     }  
-    if (PIPE_BAMVC){
-        INPUT_BAM()
-        VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet)
-    }  
-    if (PIPE_BAMSV){
-        INPUT_BAM()
-        SV(INPUT_BAM.out.bamwithsample)
-    }  
-    if (PIPE_BAMCNV){
+        if (params.qc && params.gl){
+            QC_GL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout,GL.out.glnexusout,GL.out.bcfout)
+        }  else if (params.qc){
+            QC_NOGL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout)
+        }  
+
+    }
+    
+    //TUMOR-NOMRAL BAM INPUT
+    if (params.bam_input && params.sample_sheet){
         INPUT_BAM()
-        if (params.genome == "mm10"){
-            CNVmouse(INPUT_BAM.out.bamwithsample)
-        } else if (params.genome== "hg38"){
+        if (params.vc){
             VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet)
-            CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input)
-
+        }  
+        if (params.sv){
+            SV(INPUT_BAM.out.bamwithsample)
+        }  
+        if (params.cnv){
+            if (params.genome == "mm10"){
+                CNVmouse(INPUT_BAM.out.bamwithsample)
+            } else if (params.genome== "hg38"){
+                if (!params.vc){
+                    VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet)
+                    CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input)
+                }else { 
+                    CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input)
+                }
+            }
         }
     }  
-
-
+    
     ///Tumor Only Pipelines
-    if (PIPE_TONLY_ALIGN){
+    if (params.fastq_input && !params.sample_sheet){
         INPUT_TONLY()
         ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet)
-    }
-    if (PIPE_TONLY_VC){
-        INPUT_TONLY()
-        ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet)
-        VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet)
-    }    
-    if (PIPE_TONLY_SV){
-        INPUT_TONLY()
-        ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet)
-        SV_TONLY(ALIGN_TONLY.out.bamwithsample)
-    }   
-    if (PIPE_TONLY_CNV){
-        INPUT_TONLY()
-        ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet)
-        if (params.genome == "mm10"){
-            CNVmouse_tonly(ALIGN_TONLY.out.bamwithsample)
-        } else if (params.genome== "hg38"){
+        if (params.vc){
             VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet)
-            CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input)
-
         }
-    }  
-
-    if (PIPE_TONLY_QC){
-        INPUT_TONLY()
-        ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet)
-        QC_TONLY(ALIGN_TONLY.out.fastqin,ALIGN_TONLY.out.fastpout,ALIGN_TONLY.out.bqsrout)
+        if (params.sv){
+            SV_TONLY(ALIGN_TONLY.out.bamwithsample)
+        }
+        if (params.cnv){
+            if (params.genome == "mm10"){
+                CNVmouse_tonly(ALIGN_TONLY.out.bamwithsample)
+            } else if (params.genome== "hg38"){
+                if (!params.vc){
+                    VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet)
+                    CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input)
+                } else{
+                    CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input)
+                }
+            }
+        }
+        if (params.qc){
+                QC_TONLY(ALIGN_TONLY.out.fastqin,ALIGN_TONLY.out.fastpout,ALIGN_TONLY.out.bqsrout)
+        }
+    }
 
-    }  
     //Variant Calling from BAM-Tumor Only Mode
-    if (PIPE_TONLY_BAMVC){
+    if (params.bam_input && !params.sample_sheet){
         INPUT_TONLY_BAM()
-        VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet)
-    }
-    if (PIPE_TONLY_BAMSV){
-        INPUT_TONLY_BAM()
-        SV_TONLY(INPUT_TONLY_BAM.out.bamwithsample)
-    }  
-    if (PIPE_TONLY_BAMCNV){
-        INPUT_TONLY_BAM()
-        if (params.genome == "mm10"){
-            CNVmouse_tonly(INPUT_TONLY_BAM.out.bamwithsample)
-        }else if (params.genome== "hg38"){
+        if (params.vc){
             VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet)
-            CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input)
-
         }
-    }  
+        if (params.sv){
+            SV_TONLY(INPUT_TONLY_BAM.out.bamwithsample)
+        }  
+        if (params.cnv){
+            if (params.genome == "mm10"){
+                CNVmouse_tonly(INPUT_TONLY_BAM.out.bamwithsample)
+            } else if (params.genome== "hg38"){
+                VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet)
+                CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input)
+            }
+        }  
+    
+    }
+
 }
 
     
diff --git a/nextflow.config b/nextflow.config
index 1718100..3bc3a3f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -33,22 +33,22 @@ params {
     vep_cache = "/fdb/VEP/102/cache"
 
     //SUB WORKFLOWS to SPLIT
-    PIPE_ALIGN=null
-    PIPE_GL=null
-    PIPE_VC=null
-    PIPE_SV=null
-    PIPE_CNV=null
-    PIPE_QC=null
-    PIPE_QC_NOGL=null
-    PIPE_QC_GL=null
-    PIPE_BAMVC=null
-    PIPE_BAMCNV=null
-    PIPE_BAMSV=null
-
-    PIPE_TONLY_ALIGN=null
-    PIPE_TONLY_VC=null
-    PIPE_TONLY_SV=null
-    PIPE_TONLY_CNV=null
+    align=null
+    gl=null
+    vc=null
+    sv=null
+    cnv=null
+    qc=null
+    qc_nogl=null
+    qc_gl=null
+    vc_bam=null
+    cnv_bam=null
+    sv_bam=null
+
+    //align_=null
+    //vc=null
+    sv_tumoronly=null
+    cnv_tumoronly=null
 
     PIPE_BAMVC_TONLY=null
     PIPE_TONLY_BAMVC=null
diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf
index 1d9c2de..57307c4 100644
--- a/subworkflows/local/workflows_tonly.nf
+++ b/subworkflows/local/workflows_tonly.nf
@@ -89,8 +89,8 @@ workflow ALIGN_TONLY {
         sample_sheet
 
     main:
-    fastp(fastqinput)
-    splitinterval(intervalbedin)
+        fastp(fastqinput)
+        splitinterval(intervalbedin)
     
     bwamem2(fastp.out)
     //indelrealign(bwamem2.out)

From 32a23943cbf46e05c9d1a1533421e944e1204131 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Thu, 14 Dec 2023 23:03:01 -0500
Subject: [PATCH 26/58] fix: additional changes for cli

---
 conf/ci_stub.config |  1 -
 conf/modules.config |  1 -
 main.nf             | 13 ++++++-------
 nextflow.config     | 20 +-------------------
 4 files changed, 7 insertions(+), 28 deletions(-)

diff --git a/conf/ci_stub.config b/conf/ci_stub.config
index 808f53f..af76ab6 100644
--- a/conf/ci_stub.config
+++ b/conf/ci_stub.config
@@ -15,7 +15,6 @@ params {
 process {
     cpus = 1
     memory = '1.GB'
-
         singularity {
                 enabled = false
         }
diff --git a/conf/modules.config b/conf/modules.config
index 5ac0c2d..48cf213 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -84,7 +84,6 @@ process {
         ]
     }
 
-
     withName: 'fastqc' {
         publishDir = [
             path: { "${params.outdir}/QC/fastqc" },
diff --git a/main.nf b/main.nf
index cb2ca03..01bf4a5 100644
--- a/main.nf
+++ b/main.nf
@@ -16,8 +16,8 @@ log.info """\
 
 
 
-include {INPUT; ALIGN; GL;
-    VC; INPUT_BAM; SV; CNVmouse; CNVhuman;
+include {INPUT; INPUT_BAM; ALIGN; GL;
+    VC; SV; CNVmouse; CNVhuman;
     QC_GL; QC_NOGL} from "./subworkflows/local/workflows.nf"
 
 include {INPUT_TONLY; INPUT_TONLY_BAM;
@@ -26,7 +26,6 @@ include {INPUT_TONLY; INPUT_TONLY_BAM;
 
 
 
-
 //SUB WORKFLOWS to SPLIT
 workflow.onComplete {
     if (!workflow.stubRun && !workflow.commandLine.contains('-preview')) {
@@ -40,7 +39,7 @@ workflow.onComplete {
 //Final Workflow
 workflow {
     //Inputs
-    if (params.fastq_input && params.sample_sheet){
+    if ([params.fastq_input,params.file_input].any() && params.sample_sheet){
         INPUT()
         ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
     //Germline
@@ -75,7 +74,7 @@ workflow {
     }
     
     //TUMOR-NOMRAL BAM INPUT
-    if (params.bam_input && params.sample_sheet){
+    if ([params.bam_input,params.file_input].any() && params.sample_sheet){
         INPUT_BAM()
         if (params.vc){
             VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet)
@@ -98,7 +97,7 @@ workflow {
     }  
     
     ///Tumor Only Pipelines
-    if (params.fastq_input && !params.sample_sheet){
+    if ([params.fastq_input,params.file_input].any() && !params.sample_sheet){
         INPUT_TONLY()
         ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet)
         if (params.vc){
@@ -125,7 +124,7 @@ workflow {
     }
 
     //Variant Calling from BAM-Tumor Only Mode
-    if (params.bam_input && !params.sample_sheet){
+    if ([params.bam_input,params.file_input].any() && !params.sample_sheet){
         INPUT_TONLY_BAM()
         if (params.vc){
             VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet)
diff --git a/nextflow.config b/nextflow.config
index 3bc3a3f..45ba2c3 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -33,30 +33,12 @@ params {
     vep_cache = "/fdb/VEP/102/cache"
 
     //SUB WORKFLOWS to SPLIT
-    align=null
     gl=null
     vc=null
     sv=null
     cnv=null
     qc=null
-    qc_nogl=null
-    qc_gl=null
-    vc_bam=null
-    cnv_bam=null
-    sv_bam=null
-
-    //align_=null
-    //vc=null
-    sv_tumoronly=null
-    cnv_tumoronly=null
-
-    PIPE_BAMVC_TONLY=null
-    PIPE_TONLY_BAMVC=null
-    PIPE_TONLY_BAMSV=null
-    PIPE_TONLY_BAMCNV=null
-
-    PIPE_TONLY_QC=null
-
+    
     //Set all Inputs to null
     sample_sheet=null
     fastq_input=null

From 724f0edc916ea54d95858922f2fc919bf7b7b46c Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Fri, 15 Dec 2023 09:51:46 -0500
Subject: [PATCH 27/58] fix: rename slurm

---
 CITATION.cff                   | 6 +++---
 assets/slurm_header_biowulf.sh | 2 +-
 assets/slurm_header_frce.sh    | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CITATION.cff b/CITATION.cff
index 3562005..e52b1c9 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -9,7 +9,7 @@ authors: # TODO: author names should match those in pyproject.toml
   - family-names: Koparde
     given-names: Vishal
     orcid: https://orcid.org/0000-0001-8978-8495
-title: "TOOL_NAME: insert one-line description here" # TODO: citation title should match pyproject.toml
-url: https://ccbr.github.io/TOOL_NAME/
-repository-code: https://github.com/CCBR/TOOL_NAME
+title: "LOGAN: whoLe genOme-sequencinG Analysis pipeliNe" # TODO: citation title should match pyproject.toml
+url: https://ccbr.github.io/LOGAN/
+repository-code: https://github.com/CCBR/LOGAN
 license: MIT
\ No newline at end of file
diff --git a/assets/slurm_header_biowulf.sh b/assets/slurm_header_biowulf.sh
index 65b61ab..ce79e61 100644
--- a/assets/slurm_header_biowulf.sh
+++ b/assets/slurm_header_biowulf.sh
@@ -3,7 +3,7 @@
 #SBATCH --mem=1g
 #SBATCH --time=1-00:00:00
 #SBATCH --parsable
-#SBATCH -J "tool_name"
+#SBATCH -J "LOGAN"
 #SBATCH --mail-type=BEGIN,END,FAIL
 #SBATCH --output "log/slurm_%j.log"
 #SBATCH --output "log/slurm_%j.log"
diff --git a/assets/slurm_header_frce.sh b/assets/slurm_header_frce.sh
index 665274e..957972a 100644
--- a/assets/slurm_header_frce.sh
+++ b/assets/slurm_header_frce.sh
@@ -3,7 +3,7 @@
 #SBATCH --mem=1g
 #SBATCH --time=1-00:00:00
 #SBATCH --parsable
-#SBATCH -J "tool_name"
+#SBATCH -J "LOGAN"
 #SBATCH --mail-type=BEGIN,END,FAIL
 #SBATCH --output "log/slurm_%j.log"
 #SBATCH --output "log/slurm_%j.log"

From 0d9b7c7c4d0e7c0c73ece1fc5fc5e747aab9703c Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Fri, 15 Dec 2023 10:57:58 -0500
Subject: [PATCH 28/58] fix: slurm submission changes

---
 Docker_hubmodules   | 19 -------------------
 conf/biowulf.config | 14 ++++++++------
 conf/ci_stub.config |  7 ++++---
 conf/frce.config    | 14 ++++++++------
 4 files changed, 20 insertions(+), 34 deletions(-)
 delete mode 100644 Docker_hubmodules

diff --git a/Docker_hubmodules b/Docker_hubmodules
deleted file mode 100644
index 90093ba..0000000
--- a/Docker_hubmodules
+++ /dev/null
@@ -1,19 +0,0 @@
-## Compile list of Modules to 
-    module=['vcf2maf/1.6.21','VEP/102']
-    module=['fastq_screen/0.15.2','bowtie/2-2.5.1']
-    module=['kraken/2.1.2', 'kronatools/2.8']
-    module=['fastqc/0.11.9']
-    module=['qualimap/2.2.1','java/12.0.1']
-    module=['samtools/1.16.1']
-    module=['vcftools/0.1.16']
-    module=['picard/2.20.8']
-    module=['bcftools/1.9']
-    module=['GATK/4.2.0.0']
-    module=["snpEff/4.3t"]
-    module=['multiqc/1.11']
-    module=['GATK/3.8-1']
-    module=['bwa-mem2/2.2.1','samblaster/0.1.26','samtools/1.15.1']
-    module=['fastp/0.23.2']
-
-
-
diff --git a/conf/biowulf.config b/conf/biowulf.config
index 78b3f05..584d846 100644
--- a/conf/biowulf.config
+++ b/conf/biowulf.config
@@ -26,9 +26,11 @@ singularity {
 
 env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS"
 
-process.clusterOptions = ' --gres=lscratch:200 '
-process.scratch = '/lscratch/$SLURM_JOBID'
-process.stageInMode = 'symlink'
-process.stageOutMode = 'rsync'
-// for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps
-process.cache = 'lenient'
+process {
+    clusterOptions = ' --gres=lscratch:200 '
+    scratch = '/lscratch/$SLURM_JOBID'
+    stageInMode = 'symlink'
+    stageOutMode = 'rsync'
+    // for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps
+    cache = 'lenient'
+}
\ No newline at end of file
diff --git a/conf/ci_stub.config b/conf/ci_stub.config
index af76ab6..f882c66 100644
--- a/conf/ci_stub.config
+++ b/conf/ci_stub.config
@@ -15,7 +15,8 @@ params {
 process {
     cpus = 1
     memory = '1.GB'
-        singularity {
-                enabled = false
-        }
+    scratch = false
+    singularity {
+        enabled = false
+    }
 }
diff --git a/conf/frce.config b/conf/frce.config
index 4f132a8..bd0614c 100644
--- a/conf/frce.config
+++ b/conf/frce.config
@@ -19,11 +19,13 @@ singularity {
     envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH'
 }
 
-process.scratch = null // TODO
+process {
+    scratch = null // TODO
 
-process.stageInMode = 'symlink'
-process.stageOutMode = 'rsync'
+    stageInMode = 'symlink'
+    stageOutMode = 'rsync'
 
-// for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps
-process.cache = 'lenient'
-}
+    // for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps
+    cache = 'lenient'
+
+}
\ No newline at end of file

From 699cc09ed04823b3289dae9caffd0ce5db54548e Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Fri, 15 Dec 2023 12:24:58 -0500
Subject: [PATCH 29/58] fix: unpaired mode

---
 subworkflows/local/workflows.nf | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf
index 875c317..6dd79f5 100644
--- a/subworkflows/local/workflows.nf
+++ b/subworkflows/local/workflows.nf
@@ -374,6 +374,11 @@ workflow CNVmouse {
         //FREEC Paired Mode
         bamwithsample | freec_paired
 
+        //FREEC Unpaired Mode
+        bamwithsample 
+            | map{tname,tumor,tbai,nname,norm,nbai->tuple(tname,tumor,tbai)}
+            | freec
+
 }
 
 workflow CNVhuman {

From 0467eb455f692ead24be56a2d912b315dc50f756 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Fri, 15 Dec 2023 16:50:12 -0500
Subject: [PATCH 30/58] fix: singularity changes

---
 conf/biowulf.config     |  3 ++-
 conf/interactive.config | 10 ++++++++++
 conf/modules.config     | 20 ++++++++++++--------
 main.nf                 |  7 +++++--
 nextflow.config         |  3 +--
 5 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/conf/biowulf.config b/conf/biowulf.config
index 584d846..a679cf1 100644
--- a/conf/biowulf.config
+++ b/conf/biowulf.config
@@ -21,7 +21,8 @@ singularity {
     enabled = true
     autoMounts = true
     cacheDir = "/data/CCBR_Pipeliner/SIFS"
-    envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH'
+    envWhitelist = 'https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH'
+    runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
 }
 
 env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS"
diff --git a/conf/interactive.config b/conf/interactive.config
index 3f11c04..725d1ae 100644
--- a/conf/interactive.config
+++ b/conf/interactive.config
@@ -3,5 +3,15 @@ params {
     max_memory = '220 GB'
     max_cpus = 56
     max_time = '12 h'
+
+    
 }
 process.scratch = false
+
+
+singularity {
+    enabled = true
+    autoMounts = true
+    cacheDir = "/data/CCBR_Pipeliner/SIFS"
+    envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH'
+}
\ No newline at end of file
diff --git a/conf/modules.config b/conf/modules.config
index 48cf213..4c3522c 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -1,14 +1,15 @@
 process {
 
-    publishDir = [
-        path: { task.label ? "${params.outdir}/${task.label.findAll { !it.startsWith('process_') & !it.startsWith('error_') }.join('/')}/${task.process.tokenize(':')[-1].toLowerCase()}" : "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" },
-        mode: params.publish_dir_mode,
-        saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-    ]
+    //publishDir = [
+     //   path: { task.label ? "${params.outdir}/${task.label.findAll { !it.startsWith('process_') & !it.startsWith('error_') }.join('/')}/${task.process.tokenize(':')[-1].toLowerCase()}" : "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+      //  mode: params.publish_dir_mode,
+       // saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+   // ]
 
     errorStrategy = 'finish'
 
-    withName: sequenza {
+    withName:'sequenza' {
+        container = 'dnousome/ccbr_logan_base:v0.3.0'
         publishDir = [
             path: { "${params.outdir}/cnv/sequenza" },
             mode: 'copy'
@@ -16,14 +17,17 @@ process {
     }
 
 
-    withName: freec_paired {
+    withName: 'freec_paired' {
         publishDir = [
             path: { "${params.outdir}/cnv/freec" },
             mode: 'copy'
         ]
+        container = 'dnousome/ccbr_logan_base:v0.3.0' 
+
     }
 
-    withName: freec {
+    withName:'freec' {
+        container = 'dnousome/ccbr_logan_base:v0.3.0' 
         publishDir = [
             path: { "${params.outdir}/cnv/freec" },
             mode: 'copy'
diff --git a/main.nf b/main.nf
index 01bf4a5..0717c5a 100644
--- a/main.nf
+++ b/main.nf
@@ -39,7 +39,10 @@ workflow.onComplete {
 //Final Workflow
 workflow {
     //Inputs
-    if ([params.fastq_input,params.file_input].any() && params.sample_sheet){
+    //if (params.file_input){
+        
+    //}
+    if ([params.fastq_input,params.file_input].any() && params.sample_shee && !params.bam){
         INPUT()
         ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
     //Germline
@@ -74,7 +77,7 @@ workflow {
     }
     
     //TUMOR-NOMRAL BAM INPUT
-    if ([params.bam_input,params.file_input].any() && params.sample_sheet){
+    if ([params.bam_input,params.file_input].any() && params.sample_sheet && params.bam){
         INPUT_BAM()
         if (params.vc){
             VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet)
diff --git a/nextflow.config b/nextflow.config
index 45ba2c3..e0f0e16 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -7,11 +7,10 @@ manifest {
     mainScript = "main.nf"
 }
 
-    
+includeConfig 'conf/containers.config'
 includeConfig 'conf/genomes.config'
 includeConfig 'conf/base.config'
 includeConfig 'conf/modules.config'
-includeConfig 'conf/containers.config'
 
 
 params { 

From db723ef0e0f7e20fbab732050b1b2a43ed92e428 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Mon, 18 Dec 2023 15:49:21 -0500
Subject: [PATCH 31/58] fix: additional changes to nf template

---
 conf/ci_stub.config                   |  2 ++
 docker/logan_base/Dockerfile          |  7 +++++--
 main.nf                               | 19 ++++++++++---------
 nextflow.config                       |  3 ++-
 subworkflows/local/workflows.nf       | 20 ++++++++++++++++++--
 subworkflows/local/workflows_tonly.nf |  8 ++++----
 6 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/conf/ci_stub.config b/conf/ci_stub.config
index f882c66..aa74e29 100644
--- a/conf/ci_stub.config
+++ b/conf/ci_stub.config
@@ -20,3 +20,5 @@ process {
         enabled = false
     }
 }
+
+stubRun = true
\ No newline at end of file
diff --git a/docker/logan_base/Dockerfile b/docker/logan_base/Dockerfile
index 939b44a..844a8b7 100644
--- a/docker/logan_base/Dockerfile
+++ b/docker/logan_base/Dockerfile
@@ -70,7 +70,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \
       apt-transport-https \
       software-properties-common
 
-# Install R (4.0) -- and R packages
+# Install R (4.2.2) -- and R packages
 # ggplot2  dplyr  plotly  htmlwidgets  tidyr and a few extras
 # For more information, check out: https://cran.r-project.org/bin/linux/ubuntu/
 RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 \
@@ -85,12 +85,15 @@ RUN Rscript -e 'install.packages(c("argparse"), repos="http://cran.r-project.org
 RUN Rscript -e 'install.packages(c("flexdashboard"), repos="http://cran.r-project.org")'
 RUN Rscript -e 'BiocManager::install(c("rtracklayer"))'
 
+
 # Install Sequenza-Utils/3.0.0 and Sequenza
 # Requires R, Python, SAMtools, tabix (already satisfied)
 # https://cran.r-project.org/web/packages/sequenza/vignettes/sequenza.html#getting-started
+##Install Old version of IOtools for parallel processing
 RUN pip3 install --upgrade pip \
 	  && pip3 install sequenza-utils \
-      && Rscript -e 'remotes::install_github("ShixiangWang/copynumber"); remotes::install_github("cran/sequenza")'
+      && Rscript -e 'remotes::install_github("ShixiangWang/copynumber"); remotes::install_github("cran/sequenza")' \
+      && Rscript -e 'remotes::install_version("iotools",version="0.3-2")'
 
 # Install Control-FREEC/v11.6 and additional dependencies
 # Requires R, samtools, bedtools, sambamba (already satisfied)
diff --git a/main.nf b/main.nf
index 0717c5a..ae754d1 100644
--- a/main.nf
+++ b/main.nf
@@ -16,7 +16,7 @@ log.info """\
 
 
 
-include {INPUT; INPUT_BAM; ALIGN; GL;
+include {DETERMINEBAM; INPUT; INPUT_BAM; ALIGN; GL;
     VC; SV; CNVmouse; CNVhuman;
     QC_GL; QC_NOGL} from "./subworkflows/local/workflows.nf"
 
@@ -38,11 +38,9 @@ workflow.onComplete {
 
 //Final Workflow
 workflow {
-    //Inputs
-    //if (params.file_input){
-        
-    //}
-    if ([params.fastq_input,params.file_input].any() && params.sample_shee && !params.bam){
+    DETERMINEBAM()
+    if ([params.fastq_input,params.file_input].any() && params.sample_sheet && !params.BAMINPUT){
+        println "Tumor-Normal FASTQ"
         INPUT()
         ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
     //Germline
@@ -77,7 +75,8 @@ workflow {
     }
     
     //TUMOR-NOMRAL BAM INPUT
-    if ([params.bam_input,params.file_input].any() && params.sample_sheet && params.bam){
+    if ([params.bam_input,params.file_input].any() && params.sample_sheet && BAMINPUT){
+        println "Tumor-Normal with BAMs"
         INPUT_BAM()
         if (params.vc){
             VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet)
@@ -100,7 +99,8 @@ workflow {
     }  
     
     ///Tumor Only Pipelines
-    if ([params.fastq_input,params.file_input].any() && !params.sample_sheet){
+    if ([params.fastq_input,params.file_input].any() && !params.sample_sheet && !params.BAMINPUT){
+        println "Tumor-Only FASTQ"
         INPUT_TONLY()
         ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet)
         if (params.vc){
@@ -127,7 +127,8 @@ workflow {
     }
 
     //Variant Calling from BAM-Tumor Only Mode
-    if ([params.bam_input,params.file_input].any() && !params.sample_sheet){
+    if ([params.bam_input,params.file_input].any() && !params.sample_sheet && params.BAMINPUT){
+        println "Tumor-Only BAM"
         INPUT_TONLY_BAM()
         if (params.vc){
             VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet)
diff --git a/nextflow.config b/nextflow.config
index e0f0e16..70fd5a7 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -37,11 +37,12 @@ params {
     sv=null
     cnv=null
     qc=null
-    
+    bam=null
     //Set all Inputs to null
     sample_sheet=null
     fastq_input=null
     bam_input=null
+    BAMINPUT=null
     file_input=null
 
     publish_dir_mode = 'symlink'
diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf
index 6dd79f5..3e27ff2 100644
--- a/subworkflows/local/workflows.nf
+++ b/subworkflows/local/workflows.nf
@@ -52,6 +52,21 @@ include {splitinterval} from '../../modules/local/splitbed.nf'
 
 
 
+workflow DETERMINEBAM {
+    if(params.bam_input){
+        params.BAMINPUT=true
+    }else if(params.file_input){
+            file(params.file_input).text
+                        //.splitCsv(header: false, sep: "\t", strip:true)
+                       // .map{ sample,bam,bai ->
+                        //if (bam[0] =~ /.bam/){
+                         //   params.BAMINPUT=
+                        //}
+                        //}
+    }
+
+}
+
 workflow INPUT {
 
     if(params.fastq_input){
@@ -550,12 +565,13 @@ workflow INPUT_BAM {
         if (bamcheck1.size()>0){
             baminputonly=Channel.fromPath(params.bam_input)
            .map{it-> tuple(it.simpleName,it,file("${it}.bai"))}
-        }
-              else if (bamcheck2.size()>0){
+        }else if (bamcheck2.size()>0){
             bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)}
             baminputonly=Channel.fromPath(params.bam_input)
            .map{it-> tuple(it.simpleName,it)}
            .join(bai)
+        }else if (bamcheck1.size==0 && bamcheck2.size==0){
+            println "Missing BAM Index"
         }
 
     }else if(params.file_input) {
diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf
index 57307c4..c73803a 100644
--- a/subworkflows/local/workflows_tonly.nf
+++ b/subworkflows/local/workflows_tonly.nf
@@ -1,6 +1,5 @@
 //All Worksflows in One Place
 // TODO split subworkflows out into one per file  
-
 // TODO: this line should be moved to within a subworkflow or the main workflow
 intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file')
 
@@ -93,7 +92,7 @@ workflow ALIGN_TONLY {
         splitinterval(intervalbedin)
     
     bwamem2(fastp.out)
-    //indelrealign(bwamem2.out)
+    //indelrealign(bwamem2.out) Consider indelreaglinement using ABRA?
 
     bqsrbambyinterval=bwamem2.out.combine(splitinterval.out.flatten())
 
@@ -335,12 +334,13 @@ workflow INPUT_TONLY_BAM {
         if (bamcheck1.size()>0){
             baminputonly=Channel.fromPath(params.bam_input)
                 | map{it-> tuple(it.simpleName,it,file("${it}.bai"))} 
-        }
-        else if (bamcheck2.size()>0){
+        }else if (bamcheck2.size()>0){
             bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)}
             baminputonly=Channel.fromPath(params.bam_input)
             | map{it-> tuple(it.simpleName,it)}
             | join(bai)
+        }else if (bamcheck1.size==0 && bamcheck2.size==0 ){
+            println "Missing BAM Index"
         }
 
         sample_sheet=baminputonly.map{samplename,bam,bai -> tuple (

From 2c2eae94b63d3596a35b97195716fd206c74343b Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Tue, 19 Dec 2023 12:55:33 -0500
Subject: [PATCH 32/58] fix: cnv freec changes

---
 ..._paired.pl => make_freec_genome_paired.pl} | 11 ++--
 conf/modules.config                           |  4 +-
 modules/local/copynumber.nf                   | 61 +++++++++++++------
 nextflow.config                               |  2 +-
 4 files changed, 53 insertions(+), 25 deletions(-)
 rename bin/{freec_paired.pl => make_freec_genome_paired.pl} (95%)

diff --git a/bin/freec_paired.pl b/bin/make_freec_genome_paired.pl
similarity index 95%
rename from bin/freec_paired.pl
rename to bin/make_freec_genome_paired.pl
index 161e24e..474dfaf 100644
--- a/bin/freec_paired.pl
+++ b/bin/make_freec_genome_paired.pl
@@ -26,18 +26,19 @@
 print C "chrFiles = $chrFiles\n";
 print C "minimalSubclonePresence = 20\nmaxThreads = 8\n";
 print C "outputDir = $ARGV[0]\n\n";
- 
+
 print C '[sample]' . "\n\n";
- 
+
 print C "mateFile = $tumormateFile\n";
 print C "inputFormat = BAM\nmateOrientation = FR\n\n";
 
-print C '[BAF]' . "\n\n";
+print C '[control]' . "\n\n";
 
 print C "mateFile = $controlmateFile\n";
 print C "inputFormat = BAM\nmateOrientation = FR\n\n";
- 
+
+print C '[BAF]' . "\n\n";
 print C "makePileup = $makePileup\n";
 print C "fastaFile = $fastaFile\n";
 print C "minimalCoveragePerPosition = 20\nminimalQualityPerPosition = 20\n";
-print C "SNPfile = $SNPfile";
+print C "SNPfile = $SNPfile";
\ No newline at end of file
diff --git a/conf/modules.config b/conf/modules.config
index 4c3522c..185e4ef 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -19,7 +19,7 @@ process {
 
     withName: 'freec_paired' {
         publishDir = [
-            path: { "${params.outdir}/cnv/freec" },
+            path: { "${params.outdir}/cnv/freec_paired" },
             mode: 'copy'
         ]
         container = 'dnousome/ccbr_logan_base:v0.3.0' 
@@ -29,7 +29,7 @@ process {
     withName:'freec' {
         container = 'dnousome/ccbr_logan_base:v0.3.0' 
         publishDir = [
-            path: { "${params.outdir}/cnv/freec" },
+            path: { "${params.outdir}/cnv/freec_unpaired" },
             mode: 'copy'
         ]
     }
diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf
index 757465a..fb1c9eb 100644
--- a/modules/local/copynumber.nf
+++ b/modules/local/copynumber.nf
@@ -123,11 +123,22 @@ process sequenza {
 
 process freec_paired {
     label 'process_highcpu'
+    publishDir("${outdir}/cnv/freec_paired", mode: 'copy')
 
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai)
+        tuple val(tumorname), path(tumor), path(tumorbai),
+        val(normalname), path(normal), path(normalbai)
 
-    shell: """
+    output:
+        tuple val(tumorname), val(normalname),
+        path("${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt"),
+        path("${tumorname}_vs_${normalname}.bam_ratio.txt"),
+        path("${tumorname}_vs_${normalname}.bam_BAF.txt"),
+        path("${tumorname}_vs_${normalname}.bam_ratio.txt.log2.png"),
+        path("${tumorname}_vs_${normalname}.bam_ratio.txt.png")
+
+    shell:
+    """
 
     perl $FREECPAIR_SCRIPT \
         . \
@@ -150,15 +161,21 @@ process freec_paired {
     cat $FREECPLOT | \
         R --slave \
         --args 2 \
-        ${tumorname}_vs_${normalname}.bam_ratio.txt \
-        ${tumorname}_vs_${normalname}.bam_BAF.txt
+        ${tumorname}.bam_ratio.txt \
+        ${tumorname}.bam_BAF.txt
 
-    """      
+    mv ${tumorname}.bam_CNVs.p.value.txt ${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt
+    mv ${tumorname}.bam_ratio.txt ${tumorname}_vs_${normalname}.bam_ratio.txt
+    mv ${tumorname}.bam_BAF.txt ${tumorname}_vs_${normalname}.bam_BAF.txt
+    mv ${tumorname}.bam_ratio.txt.log2.png ${tumorname}_vs_${normalname}.bam_ratio.txt.log2.png
+    mv ${tumorname}.bam_ratio.txt.png ${tumorname}_vs_${normalname}.bam_ratio.txt.png
+
+    """
 
     stub:
     """
-    touch ${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt  
-    touch ${tumorname}_vs_${normalname}.bam_ratio.txt 
+    touch ${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt
+    touch ${tumorname}_vs_${normalname}.bam_ratio.txt
     touch ${tumorname}_vs_${normalname}.bam_BAF.txt
     touch ${tumorname}_vs_${normalname}.bam_ratio.txt.log2.png
     touch ${tumorname}_vs_${normalname}.bam_ratio.txt.png
@@ -169,10 +186,20 @@ process freec_paired {
 
 process freec {
     label 'process_mid'
+    publishDir("${outdir}/cnv/freec_unpaired", mode: 'copy')
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai)
 
+    output:
+        tuple val(tumorname),
+        path("${tumorname}.bam_CNVs.p.value.txt"),
+        path("${tumorname}.bam_ratio.txt"),
+        path("${tumorname}.bam_BAF.txt"),
+        path("${tumorname}.bam_ratio.txt.log2.png"),
+        path("${tumorname}.bam_ratio.txt.png")
+
+
     shell: """
 
     perl $FREECSCRIPT \
@@ -189,24 +216,24 @@ process freec {
 
     cat $FREECSIGNIFICANCE | \
         R --slave \
-        --args ${tumor}_CNVs \
-        ${tumor}_ratio.txt
+        --args ${tumorname}_CNVs \
+        ${tumorname}_ratio.txt
 
     cat $FREECPLOT | \
         R --slave \
         --args 2 \
-        ${tumor}_ratio.txt \
-        ${tumor}_BAF.txt
+        ${tumorname}_ratio.txt \
+        ${tumorname}_BAF.txt
 
-    """      
+    """
 
     stub:
     """
-    touch ${tumor}_CNVs.p.value.txt  
-    touch ${tumor}_ratio.txt 
-    touch ${tumor}_BAF.txt 
-    touch ${tumor}_ratio.txt.log2.png
-    touch ${tumor}_ratio.txt.png
+    touch ${tumorname}_CNVs.p.value.txt
+    touch ${tumorname}_ratio.txt
+    touch ${tumorname}_BAF.txt
+    touch ${tumorname}_ratio.txt.log2.png
+    touch ${tumorname}_ratio.txt.png
 
     """
 }
diff --git a/nextflow.config b/nextflow.config
index 70fd5a7..9dabc54 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -23,7 +23,7 @@ params {
     script_ancestry = "${projectDir}/bin/sampleCompareAncestoryPlots.R"
     script_sequenza = "${projectDir}/bin/run_sequenza.R"
     script_freec = "${projectDir}/bin/make_freec_genome.pl"
-    script_freecpaired = "${projectDir}/bin/freec_paired.pl"
+    script_freecpaired = "${projectDir}/bin/make_freec_genome_paired.pl"
     freec_significance = "${projectDir}/bin/assess_significance.R"
     freec_plot = "${projectDir}/bin/makeGraph.R"
     lofreq_convert = "${projectDir}/bin/add_gt_lofreq.sh"

From bc6d0362ed2f5a9b5131284baadd5f61c3ba6dcc Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Tue, 19 Dec 2023 13:55:06 -0500
Subject: [PATCH 33/58] fix: simplify file inputs

---
 main.nf                               | 11 +++++------
 subworkflows/local/workflows.nf       |  8 ++++----
 subworkflows/local/workflows_tonly.nf |  8 ++++----
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/main.nf b/main.nf
index ae754d1..2c8367b 100644
--- a/main.nf
+++ b/main.nf
@@ -10,7 +10,7 @@ log.info """\
          genome: ${params.genome}
          outdir: ${params.outdir}
          Sample Sheet: ${params.sample_sheet}
-         Samples: ${params.fastq_input} ${params.file_input} ${params.bam_input}
+         Samples: ${params.fastq_input} ${params.fastq_file_input} ${params.bam_input} ${params.bam_file_input} 
          """
          .stripIndent()
 
@@ -38,8 +38,7 @@ workflow.onComplete {
 
 //Final Workflow
 workflow {
-    DETERMINEBAM()
-    if ([params.fastq_input,params.file_input].any() && params.sample_sheet && !params.BAMINPUT){
+        if ([params.fastq_input,params.fastq_file_input].any() && params.sample_sheet){
         println "Tumor-Normal FASTQ"
         INPUT()
         ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet)
@@ -75,7 +74,7 @@ workflow {
     }
     
     //TUMOR-NOMRAL BAM INPUT
-    if ([params.bam_input,params.file_input].any() && params.sample_sheet && BAMINPUT){
+    if ([params.bam_input,params.bam_file_input].any() && params.sample_sheet){
         println "Tumor-Normal with BAMs"
         INPUT_BAM()
         if (params.vc){
@@ -99,7 +98,7 @@ workflow {
     }  
     
     ///Tumor Only Pipelines
-    if ([params.fastq_input,params.file_input].any() && !params.sample_sheet && !params.BAMINPUT){
+    if ([params.fastq_input,params.file_input].any() && !params.sample_sheet){
         println "Tumor-Only FASTQ"
         INPUT_TONLY()
         ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet)
@@ -127,7 +126,7 @@ workflow {
     }
 
     //Variant Calling from BAM-Tumor Only Mode
-    if ([params.bam_input,params.file_input].any() && !params.sample_sheet && params.BAMINPUT){
+    if ([params.bam_input,params.bam_file_input].any() && !params.sample_sheet){
         println "Tumor-Only BAM"
         INPUT_TONLY_BAM()
         if (params.vc){
diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf
index 3e27ff2..766a41a 100644
--- a/subworkflows/local/workflows.nf
+++ b/subworkflows/local/workflows.nf
@@ -71,8 +71,8 @@ workflow INPUT {
 
     if(params.fastq_input){
         fastqinput=Channel.fromFilePairs(params.fastq_input)
-    }else if(params.file_input) {
-        fastqinput=Channel.fromPath(params.file_input)
+    }else if(params.fastq_file_input) {
+        fastqinput=Channel.fromPath(params.fastq_file_input)
                         .splitCsv(header: false, sep: "\t", strip:true)
                         .map{ sample,fq1,fq2 ->
                         tuple(sample, tuple(file(fq1),file(fq2)))
@@ -574,8 +574,8 @@ workflow INPUT_BAM {
             println "Missing BAM Index"
         }
 
-    }else if(params.file_input) {
-        baminputonly=Channel.fromPath(params.file_input)
+    }else if(params.bam_file_input) {
+        baminputonly=Channel.fromPath(params.bam_file_input)
                         .splitCsv(header: false, sep: "\t", strip:true)
                         .map{ sample,bam,bai  ->
                         tuple(sample, file(bam),file(bai))
diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf
index c73803a..9f19e1a 100644
--- a/subworkflows/local/workflows_tonly.nf
+++ b/subworkflows/local/workflows_tonly.nf
@@ -51,8 +51,8 @@ workflow INPUT_TONLY {
     if(params.fastq_input){
         fastqinput=Channel.fromFilePairs(params.fastq_input)
 
-    }else if(params.file_input) {
-        fastqinput=Channel.fromPath(params.file_input)
+    }else if(params.fastq_file_input) {
+        fastqinput=Channel.fromPath(params.fastq_file_input)
                         .splitCsv(header: false, sep: "\t", strip:true)
                         .map{ sample,fq1,fq2 -> 
                         tuple(sample, tuple(file(fq1),file(fq2)))
@@ -346,8 +346,8 @@ workflow INPUT_TONLY_BAM {
         sample_sheet=baminputonly.map{samplename,bam,bai -> tuple (
              samplename)}
 
-    }else if(params.file_input) {
-        baminputonly=Channel.fromPath(params.file_input)
+    }else if(params.bam_file_input) {
+        baminputonly=Channel.fromPath(params.bam_file_input)
                         .splitCsv(header: false, sep: "\t", strip:true)
                         .map{ sample,bam,bai  -> 
                         tuple(sample, file(bam),file(bai))

From 7484751f6504228d09dabe95950a5349d550f4bd Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Tue, 19 Dec 2023 13:58:20 -0500
Subject: [PATCH 34/58] feat: add file input lists

---
 nextflow.config | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 9dabc54..9d4783f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -40,10 +40,15 @@ params {
     bam=null
     //Set all Inputs to null
     sample_sheet=null
+    
+    fastq_file_input=null
+    bam_file_input=null
+    file_input=null
+
     fastq_input=null
     bam_input=null
+
     BAMINPUT=null
-    file_input=null
 
     publish_dir_mode = 'symlink'
     outdir = 'results'

From af3f7eab013d4e09cde4dfeb2eaf3c8acb8269f4 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 20 Dec 2023 11:05:59 -0500
Subject: [PATCH 35/58] fix: rename cnv output

---
 modules/local/copynumber.nf | 63 ++++++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 26 deletions(-)

diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf
index fb1c9eb..be691a9 100644
--- a/modules/local/copynumber.nf
+++ b/modules/local/copynumber.nf
@@ -121,6 +121,7 @@ process sequenza {
 
 }
 
+
 process freec_paired {
     label 'process_highcpu'
     publishDir("${outdir}/cnv/freec_paired", mode: 'copy')
@@ -131,11 +132,11 @@ process freec_paired {
 
     output:
         tuple val(tumorname), val(normalname),
-        path("${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt"),
-        path("${tumorname}_vs_${normalname}.bam_ratio.txt"),
-        path("${tumorname}_vs_${normalname}.bam_BAF.txt"),
-        path("${tumorname}_vs_${normalname}.bam_ratio.txt.log2.png"),
-        path("${tumorname}_vs_${normalname}.bam_ratio.txt.png")
+        path("${tumorname}_vs_${normalname}_CNVs.p.value.txt"),
+        path("${tumorname}_vs_${normalname}_ratio.txt"),
+        path("${tumorname}_vs_${normalname}_BAF.txt"),
+        path("${tumorname}_vs_${normalname}_ratio.txt.log2.png"),
+        path("${tumorname}_vs_${normalname}_ratio.txt.png")
 
     shell:
     """
@@ -161,24 +162,26 @@ process freec_paired {
     cat $FREECPLOT | \
         R --slave \
         --args 2 \
-        ${tumorname}.bam_ratio.txt \
-        ${tumorname}.bam_BAF.txt
+        ${tumor}_ratio.txt \
+        ${tumor}_BAF.txt
 
-    mv ${tumorname}.bam_CNVs.p.value.txt ${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt
-    mv ${tumorname}.bam_ratio.txt ${tumorname}_vs_${normalname}.bam_ratio.txt
-    mv ${tumorname}.bam_BAF.txt ${tumorname}_vs_${normalname}.bam_BAF.txt
-    mv ${tumorname}.bam_ratio.txt.log2.png ${tumorname}_vs_${normalname}.bam_ratio.txt.log2.png
-    mv ${tumorname}.bam_ratio.txt.png ${tumorname}_vs_${normalname}.bam_ratio.txt.png
+    mv ${tumor}_CNVs.p.value.txt ${tumorname}_vs_${normalname}_CNVs.p.value.txt
+    mv ${tumor}_ratio.txt ${tumorname}_vs_${normalname}_ratio.txt
+    mv ${tumor}_BAF.txt ${tumorname}_vs_${normalname}_BAF.txt
+    mv ${tumor}_BAF.txt.png ${tumorname}_vs_${normalname}_BAF.txt.png
+    mv ${tumor}_ratio.txt.log2.png ${tumorname}_vs_${normalname}_ratio.txt.log2.png
+    mv ${tumor}_ratio.txt.png ${tumorname}_vs_${normalname}_ratio.txt.png
 
     """
 
     stub:
     """
-    touch ${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt
-    touch ${tumorname}_vs_${normalname}.bam_ratio.txt
-    touch ${tumorname}_vs_${normalname}.bam_BAF.txt
-    touch ${tumorname}_vs_${normalname}.bam_ratio.txt.log2.png
-    touch ${tumorname}_vs_${normalname}.bam_ratio.txt.png
+    touch ${tumorname}_vs_${normalname}_CNVs.p.value.txt
+    touch ${tumorname}_vs_${normalname}_ratio.txt
+    touch ${tumorname}_vs_${normalname}_BAF.txt
+    touch ${tumorname}_vs_${normalname}_BAF.txt.png
+    touch ${tumorname}_vs_${normalname}_ratio.txt.log2.png
+    touch ${tumorname}_vs_${normalname}_ratio.txt.png
 
     """
 }
@@ -193,11 +196,11 @@ process freec {
 
     output:
         tuple val(tumorname),
-        path("${tumorname}.bam_CNVs.p.value.txt"),
-        path("${tumorname}.bam_ratio.txt"),
-        path("${tumorname}.bam_BAF.txt"),
-        path("${tumorname}.bam_ratio.txt.log2.png"),
-        path("${tumorname}.bam_ratio.txt.png")
+        path("${tumorname}_CNVs.p.value.txt"),
+        path("${tumorname}_ratio.txt"),
+        path("${tumorname}_BAF.txt"),
+        path("${tumorname}_ratio.txt.log2.png"),
+        path("${tumorname}_ratio.txt.png")
 
 
     shell: """
@@ -216,14 +219,21 @@ process freec {
 
     cat $FREECSIGNIFICANCE | \
         R --slave \
-        --args ${tumorname}_CNVs \
-        ${tumorname}_ratio.txt
+        --args ${tumor}_CNVs \
+        ${tumor}_ratio.txt
 
     cat $FREECPLOT | \
         R --slave \
         --args 2 \
-        ${tumorname}_ratio.txt \
-        ${tumorname}_BAF.txt
+        ${tumor}_ratio.txt \
+        ${tumor}_BAF.txt
+
+    mv ${tumor}_CNVs.p.value.txt ${tumorname}_CNVs.p.value.txt
+    mv ${tumor}_ratio.txt ${tumorname}_ratio.txt
+    mv ${tumor}_BAF.txt ${tumorname}_BAF.txt
+    mv ${tumor}_BAF.txt.png ${tumorname}_BAF.txt.png
+    mv ${tumor}_ratio.txt.log2.png ${tumorname}_ratio.txt.log2.png
+    mv ${tumor}_ratio.txt.png ${tumorname}_ratio.txt.png
 
     """
 
@@ -232,6 +242,7 @@ process freec {
     touch ${tumorname}_CNVs.p.value.txt
     touch ${tumorname}_ratio.txt
     touch ${tumorname}_BAF.txt
+    touch ${tumorname}_BAF.txt.png
     touch ${tumorname}_ratio.txt.log2.png
     touch ${tumorname}_ratio.txt.png
 

From 90a05520159a79c54f97e0bdaee31f4d9fdeb87e Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Tue, 9 Jan 2024 16:43:07 -0500
Subject: [PATCH 36/58] fix: resources location

---
 conf/genomes.config | 17 +++++++----------
 nextflow.config     | 32 ++++++++++++++++----------------
 2 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/conf/genomes.config b/conf/genomes.config
index 2ee6cdc..3d0843a 100644
--- a/conf/genomes.config
+++ b/conf/genomes.config
@@ -1,11 +1,11 @@
 params {
     genomes {
         'hg38' {
-            genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" 
-            genomefai = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta.fai" 
+            genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta"
+            genomefai = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta.fai"
             bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta"
             genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict"
-            wgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" 
+            wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list"
             intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed"
             //millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
             //shapeitindel =  "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) //
@@ -14,7 +14,7 @@ params {
             dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz"
             dbsnp_indel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_indel.vcf"
             gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz
-            pon = "/data/nousomedr/wgs/updatedpon.vcf.gz"    //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon} 
+            pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/updatedpon.vcf.gz"    //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon}
             kgp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/1000G_phase1.snps.high_confidence.hg38.vcf.gz"
             KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2"
             snpeff_genome = "GRCh38.86"
@@ -29,11 +29,11 @@ params {
             octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest"
             SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz"
             chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM']
-        }    
+        }
 
         'mm10' {
-            genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa" // file(params.genome) 
-            genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa.fai" // file(params.genome) 
+            genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa" // file(params.genome)
+            genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa.fai" // file(params.genome)
             bwagenome= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwaindex/genome.fa"
             genomedict= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.dict"
             intervals="/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/mm10_wgsregions.bed"
@@ -66,6 +66,3 @@ params {
         }
     }
 }
-
-
-
diff --git a/nextflow.config b/nextflow.config
index 9d4783f..3a3392d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -13,7 +13,7 @@ includeConfig 'conf/base.config'
 includeConfig 'conf/modules.config'
 
 
-params { 
+params {
 
     fastq_screen_conf = "${projectDir}/conf/fastq_screen.conf"
     get_flowcell_lanes = "${projectDir}/bin/scripts/flowcell_lane.py"
@@ -27,7 +27,7 @@ params {
     freec_significance = "${projectDir}/bin/assess_significance.R"
     freec_plot = "${projectDir}/bin/makeGraph.R"
     lofreq_convert = "${projectDir}/bin/add_gt_lofreq.sh"
-    split_regions = "24" //Number of regions to split by 
+    split_regions = "24" //Number of regions to split by
 
     vep_cache = "/fdb/VEP/102/cache"
 
@@ -40,7 +40,7 @@ params {
     bam=null
     //Set all Inputs to null
     sample_sheet=null
-    
+
     fastq_file_input=null
     bam_file_input=null
     file_input=null
@@ -72,25 +72,25 @@ profiles {
         autoMounts = true
         cacheDir = "$PWD/singularity"
         envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID'
-        runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
+        runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
         }
-    biowulf { 
-        includeConfig 'conf/biowulf.config' 
+    biowulf {
+        includeConfig 'conf/biowulf.config'
     }
-    frce { 
-        includeConfig 'conf/frce.config' 
+    frce {
+        includeConfig 'conf/frce.config'
     }
-    interactive { 
-        includeConfig 'conf/interactive.config' 
+    interactive {
+        includeConfig 'conf/interactive.config'
     }
-    slurm { 
-        includeConfig 'conf/slurm.config' 
+    slurm {
+        includeConfig 'conf/slurm.config'
     }
-    ci_stub { 
-        includeConfig 'conf/ci_stub.config' 
+    ci_stub {
+        includeConfig 'conf/ci_stub.config'
     }
 }
-  
+
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container
 // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
 // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.
@@ -104,7 +104,7 @@ profiles {
 // Capture exit codes from upstream processes when piping
 process.shell = ['/bin/bash', '-euo', 'pipefail']
 
-    
+
 
 
 

From 4206f0ce1537fed9748a4fb0ff4a056835795465 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Tue, 9 Jan 2024 16:46:19 -0500
Subject: [PATCH 37/58] fix: simplify bind paths

---
 conf/biowulf.config | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/biowulf.config b/conf/biowulf.config
index a679cf1..77a06d5 100644
--- a/conf/biowulf.config
+++ b/conf/biowulf.config
@@ -22,7 +22,7 @@ singularity {
     autoMounts = true
     cacheDir = "/data/CCBR_Pipeliner/SIFS"
     envWhitelist = 'https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH'
-    runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
+    runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
 }
 
 env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS"
@@ -34,4 +34,4 @@ process {
     stageOutMode = 'rsync'
     // for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps
     cache = 'lenient'
-}
\ No newline at end of file
+}

From 9654866777794fcbc1c3afa1032b59a8a2f286af Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Tue, 9 Jan 2024 18:00:05 -0500
Subject: [PATCH 38/58] fix: edit all container paths

---
 conf/containers.config                 |   5 +-
 conf/interactive.config                |   6 +-
 conf/modules.config                    |  10 +-
 modules/local/copynumber.nf            |  50 +++--
 modules/local/germline.nf              |  53 +++---
 modules/local/qc.nf                    | 190 +++++++++----------
 modules/local/trim_align.nf            |  62 ++++---
 modules/local/variant_calling.nf       | 243 +++++++++++++------------
 modules/local/variant_calling_tonly.nf | 111 ++++++-----
 nextflow.config                        |   3 +-
 10 files changed, 396 insertions(+), 337 deletions(-)

diff --git a/conf/containers.config b/conf/containers.config
index 504d3b8..2ceaf3c 100644
--- a/conf/containers.config
+++ b/conf/containers.config
@@ -2,6 +2,9 @@
 params {
     containers {
         base = 'nciccbr/ccbr_ubuntu_base_20.04:v6.1'
-        logan = 'docker://dnousome/ccbr_logan_base:v0.3.0' 
+        logan = 'docker://dnousome/ccbr_logan_base:v0.3.3'
+        vcf2maf = 'docker://dnousome/ccbr_vcf2maf:v102.0.0'
+        octopus = 'docker://dancooke/octopus:latest'
+
     }
 }
diff --git a/conf/interactive.config b/conf/interactive.config
index 725d1ae..9808cb5 100644
--- a/conf/interactive.config
+++ b/conf/interactive.config
@@ -4,7 +4,7 @@ params {
     max_cpus = 56
     max_time = '12 h'
 
-    
+
 }
 process.scratch = false
 
@@ -14,4 +14,6 @@ singularity {
     autoMounts = true
     cacheDir = "/data/CCBR_Pipeliner/SIFS"
     envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH'
-}
\ No newline at end of file
+}
+
+env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS"
diff --git a/conf/modules.config b/conf/modules.config
index 185e4ef..a5ff333 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -9,7 +9,7 @@ process {
     errorStrategy = 'finish'
 
     withName:'sequenza' {
-        container = 'dnousome/ccbr_logan_base:v0.3.0'
+        container = 'dnousome/ccbr_logan_base:v0.3.3'
         publishDir = [
             path: { "${params.outdir}/cnv/sequenza" },
             mode: 'copy'
@@ -22,12 +22,12 @@ process {
             path: { "${params.outdir}/cnv/freec_paired" },
             mode: 'copy'
         ]
-        container = 'dnousome/ccbr_logan_base:v0.3.0' 
+        container = 'dnousome/ccbr_logan_base:v0.3.3'
 
     }
 
     withName:'freec' {
-        container = 'dnousome/ccbr_logan_base:v0.3.0' 
+        container = 'dnousome/ccbr_logan_base:v0.3.3'
         publishDir = [
             path: { "${params.outdir}/cnv/freec_unpaired" },
             mode: 'copy'
@@ -192,7 +192,7 @@ process {
             mode: 'copy'
         ]
     }
-      
+
     withName: 'annotsv_tn' {
         publishDir = [
             path: { "${params.outdir}/SV/annotated" },
@@ -291,5 +291,5 @@ process {
             path: { "${params.outdir}/vcfs/combined" },
             mode: 'copy'
         ]
-    }   
+    }
 }
diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf
index be691a9..e5e0691 100644
--- a/modules/local/copynumber.nf
+++ b/modules/local/copynumber.nf
@@ -21,12 +21,13 @@ ENSEMBLCACHE='/data/SCLC-BRAINMETS/cn/common/ensembl_data'
 DRIVERS='/data/SCLC-BRAINMETS/cn/common/DriverGenePanel.38.tsv'
 HOTSPOTS='/data/SCLC-BRAINMETS/cn/variants/KnownHotspots.somatic.38.vcf.gz'
 
-//DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) 
+//DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS)
 //ascatR=
 
 
-//mm10 Paired-Sequenza, FREEC-tumor only 
+//mm10 Paired-Sequenza, FREEC-tumor only
 process seqz_sequenza_bychr {
+    container = "${params.containers.logan}"
     label 'process_low'
 
     input:
@@ -57,13 +58,15 @@ process seqz_sequenza_bychr {
 
 
 process sequenza {
+    container = "${params.containers.logan}"
+
     label 'process_highcpu'
 
     input:
         tuple val(pairid), path(seqz)
 
     output:
-        tuple val(pairid), 
+        tuple val(pairid),
         path("${pairid}_alternative_solutions.txt"),
         path("${pairid}_alternative_fit.pdf"),
         path("${pairid}_model_fit.pdf"),
@@ -83,9 +86,9 @@ process sequenza {
     //samtools mpileup ${normal} -f $GENOMEREF -Q 20 |gzip > ${normalname}.mpileup.gz
     //sequenza-utils seqz_binning --seqz --window 50 -o ${sample}_bin50.seqz.gz
 
-    shell: 
+    shell:
     '''
-    
+
     zcat !{seqz} | awk '{if (NR==1) {print $0} else {if ($1!="chromosome"){print $0}}}' |\
     sequenza-utils seqz_binning \
         -w 100 \
@@ -99,11 +102,11 @@ process sequenza {
 
     '''
 
-    stub: 
-    
+    stub:
+
     """
-    touch "${pairid}_alternative_solutions.txt" 
-    touch "${pairid}_alternative_fit.pdf" 
+    touch "${pairid}_alternative_solutions.txt"
+    touch "${pairid}_alternative_fit.pdf"
     touch "${pairid}_model_fit.pdf"
     touch "${pairid}_confints_CP.txt"
     touch "${pairid}_CN_bars.pdf"
@@ -123,6 +126,8 @@ process sequenza {
 
 
 process freec_paired {
+    container = "${params.containers.logan}"
+
     label 'process_highcpu'
     publishDir("${outdir}/cnv/freec_paired", mode: 'copy')
 
@@ -188,6 +193,8 @@ process freec_paired {
 
 
 process freec {
+    container = "${params.containers.logan}"
+
     label 'process_mid'
     publishDir("${outdir}/cnv/freec_unpaired", mode: 'copy')
 
@@ -251,11 +258,13 @@ process freec {
 
 
 process amber_tonly {
+    container = "${params.containers.logan}"
+
     label 'process_mid'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai)
-       
+
 
     output:
         tuple val(tumorname), path("${tumorname}_amber")
@@ -281,13 +290,15 @@ process amber_tonly {
 
     """
     mkdir ${tumorname}_amber
-    touch ${tumorname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_amber/${tumorname}.amber.qc 
+    touch ${tumorname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_amber/${tumorname}.amber.qc
     """
 }
 
 process amber_tn {
+    container = "${params.containers.logan}"
+
     label 'process_mid'
-    
+
     input:
         tuple val(tumorname), path(tumor), path(tumorbai),
         val(normalname), path(normal), path(normalbai)
@@ -317,11 +328,13 @@ process amber_tn {
 
     """
     mkdir ${tumorname}_vs_${normalname}_amber
-    touch ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.qc 
+    touch ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.qc
     """
 }
 
 process cobalt_tonly {
+    container = "${params.containers.logan}"
+
     label "process_mid"
 
     input:
@@ -329,7 +342,7 @@ process cobalt_tonly {
 
     output:
         tuple val(tumorname), path("${tumorname}_cobalt")
-        //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), 
+        //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"),
         //path("${samplename}/${samplename}.cobalt.ratio.pcf"),
         //path("${samplename}/${samplename}.cobalt.gc.median.tsv")
 
@@ -355,6 +368,8 @@ process cobalt_tonly {
 }
 
 process cobalt_tn {
+    container = "${params.containers.logan}"
+
     label "process_mid"
 
     input:
@@ -363,7 +378,7 @@ process cobalt_tn {
 
     output:
         tuple val(tumorname), path("${tumorname}_vs_${normalname}_cobalt")
-        //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), 
+        //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"),
         //path("${samplename}/${samplename}.cobalt.ratio.pcf"),
         //path("${samplename}/${samplename}.cobalt.gc.median.tsv")
 
@@ -391,12 +406,14 @@ process cobalt_tn {
 
 
 process purple {
+    container = "${params.containers.logan}"
+
     label 'process_mid'
     publishDir("${outdir}/cnv/purple", mode: 'copy')
 
     input:
         tuple val(tumorname),
-        path(cobaltin), 
+        path(cobaltin),
         path(amberin),
         path(somaticvcf),
         path(somaticvcfindex)
@@ -474,4 +491,3 @@ process ascat_tn {
 }
 
 */
-
diff --git a/modules/local/germline.nf b/modules/local/germline.nf
index c106683..285a0f3 100644
--- a/modules/local/germline.nf
+++ b/modules/local/germline.nf
@@ -5,15 +5,15 @@ MODEL="/opt/models/wgs/model.ckpt"
 //Processes
 //Deep Variant
 process deepvariant_step1 {
-    
+
     input:
         tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path(bed)
-    
+
     output:
-        tuple val(samplename), path("outputshard/${samplename}.tfrecord_${bed}.gz"), 
+        tuple val(samplename), path("outputshard/${samplename}.tfrecord_${bed}.gz"),
         path("gvcf/${samplename}.gvcf.tfrecord_${bed}.gz")
 
-    script:     
+    script:
     """
     mkdir -p outputshard
     mkdir -p gvcf
@@ -24,7 +24,7 @@ process deepvariant_step1 {
     --reads ${samplename}.bam \
     --channels insert_size \
     --examples outputshard/${samplename}.tfrecord_${bed}.gz \
-    --gvcf gvcf/${samplename}.gvcf.tfrecord_${bed}.gz 
+    --gvcf gvcf/${samplename}.gvcf.tfrecord_${bed}.gz
     """
 
     stub:
@@ -32,23 +32,22 @@ process deepvariant_step1 {
     mkdir -p outputshard
     mkdir -p gvcf
     touch outputshard/${samplename}.tfrecord_${bed}.gz
-    touch gvcf/${samplename}.gvcf.tfrecord_${bed}.gz 
+    touch gvcf/${samplename}.gvcf.tfrecord_${bed}.gz
     """
 
 }
 
 //Step 2 requires GPU
 process deepvariant_step2 {
-    
-    
+
     input:
         tuple val(samplename), path(tfrecords), path(tfgvcf)
-    
+
     output:
-        tuple val(samplename), path(tfrecords), 
+        tuple val(samplename), path(tfrecords),
         path("${samplename}_call_variants_output.tfrecord.gz"), path(tfgvcf)
 
-    script: 
+    script:
 
     """
     call_variants \
@@ -69,17 +68,16 @@ process deepvariant_step2 {
 //Step 3 DV
 process deepvariant_step3 {
 
-    
     input:
         tuple val(samplename), path(tfrecords), path("${samplename}_call_variants_output.tfrecord.gz"),
         path(tfgvcf)
-    
+
     output:
         tuple val(samplename), path("${samplename}.vcf.gz"), path("${samplename}.vcf.gz.tbi"),
         path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi")
 
 
-    script: 
+    script:
     """
    postprocess_variants \
     --ref $GENOMEREF \
@@ -101,16 +99,15 @@ process deepvariant_step3 {
 //Combined DeepVariant
 process deepvariant_combined {
 
-
     input:
         tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai")
-    
+
     output:
         tuple val(samplename), path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi"),
         path("${samplename}.vcf.gz"), path("${samplename}.vcf.gz.tbi")
 
 
-    script:     
+    script:
     """
     run_deepvariant \
         --model_type=WGS \
@@ -118,7 +115,7 @@ process deepvariant_combined {
         --reads=${samplename}.bam \
         --output_gvcf= ${samplename}.gvcf.gz \
         --output_vcf=${samplename}.vcf.gz \
-        --num_shards=16 
+        --num_shards=16
     """
 
 
@@ -126,7 +123,7 @@ process deepvariant_combined {
     """
     touch ${samplename}.vcf.gz ${samplename}.vcf.gz.tbi
     touch ${samplename}.gvcf.gz  ${samplename}.gvcf.gz.tbi
-    
+
     """
 
 
@@ -134,20 +131,19 @@ process deepvariant_combined {
 
 process glnexus {
 
- 
     input:
         path(gvcfs)
-    
+
     output:
-        tuple path("germline.v.bcf"), 
+        tuple path("germline.v.bcf"),
         path("germline.norm.vcf.gz"),path("germline.norm.vcf.gz.tbi")
 
-    script: 
+    script:
 
     """
     glnexus_cli --config DeepVariant_unfiltered \
     *.gvcf.gz --threads 8 > germline.v.bcf
-    
+
     bcftools norm \
         -m - \
         -Oz \
@@ -160,18 +156,13 @@ process glnexus {
         -f -t \
         --threads 8 \
         germline.norm.vcf.gz
-    
+
     """
 
     stub:
     """
         touch germline.v.bcf
-        touch germline.norm.vcf.gz 
+        touch germline.norm.vcf.gz
         touch germline.norm.vcf.gz.tbi
     """
 }
-
-
-
-
-
diff --git a/modules/local/qc.nf b/modules/local/qc.nf
index 82bcc1a..04b8022 100644
--- a/modules/local/qc.nf
+++ b/modules/local/qc.nf
@@ -13,19 +13,21 @@ ANCESTRY_DB=file(params.genomes[params.genome].somalier_ancestrydb)
 SCRIPT_PATH_GENDER = file(params.script_genderPrediction)
 SCRIPT_PATH_SAMPLES = file(params.script_combineSamples)
 SCRIPT_PATH_PCA = file(params.script_ancestry)
-    
 
-//OUTPUT DIRECTORY 
+
+//OUTPUT DIRECTORY
 process fc_lane {
+    container = "${params.containers.logan}"
+
     label 'process_low'
 
     input:
         tuple val(samplename), path(fqs)
 
-    output: 
+    output:
         tuple val(samplename),
-        path("${samplename}.fastq.info.txt")    
-    
+        path("${samplename}.fastq.info.txt")
+
     script:
     GET_FLOWCELL_LANES=file(params.get_flowcell_lanes)
 
@@ -35,7 +37,7 @@ process fc_lane {
     ${samplename} > ${samplename}.fastq.info.txt
     """
 
-    stub: 
+    stub:
     """
     touch ${samplename}.fastq.info.txt
     """
@@ -61,7 +63,7 @@ process fastq_screen {
         path("${samplename}.R2.trimmed_screen.png"),
         path("${samplename}.R2.trimmed_screen.txt")
 
-    script: 
+    script:
         FASTQ_SCREEN_CONF=file(params.fastq_screen_conf)
 
         """
@@ -75,7 +77,7 @@ process fastq_screen {
 
         """
 
-    stub: 
+    stub:
     """
     touch ${samplename}.R1.trimmed_screen.html ${samplename}.R1.trimmed_screen.png
     touch ${samplename}.R1.trimmed_screen.txt ${samplename}.R2.trimmed_screen.html
@@ -92,11 +94,11 @@ process kraken {
     @Input:
         Trimmed FastQ files (scatter)
     @Output:
-        Kraken logfile and interative krona report
+        Kraken logfile and interactive krona report
     */
-    
+
     input:
-        tuple val(samplename), 
+        tuple val(samplename),
         path(fqs)
 
     output:
@@ -104,19 +106,19 @@ process kraken {
         //path("${samplename}.trimmed.kraken_bacteria.out.txt"),
         path("${samplename}.trimmed.kraken_bacteria.taxa.txt"),
         path("${samplename}.trimmed.kraken_bacteria.krona.html")
-        
 
-    script: 
+
+    script:
     """
     #Setups temporary directory for
-    #intermediate files with built-in 
+    #intermediate files with built-in
     #mechanism for deletion on exit
-    
-    
+
+
     # Copy kraken2 db to local node storage to reduce filesystem strain
     cp -rv $BACDB .
     kdb_base=\$(basename $BACDB)
-    
+
     kraken2 --db $BACDB \
         --threads 16 --report ${samplename}.trimmed.kraken_bacteria.taxa.txt \
         --output - \
@@ -127,7 +129,7 @@ process kraken {
         ktImportTaxonomy - -o ${samplename}.trimmed.kraken_bacteria.krona.html
     """
 
-    stub: 
+    stub:
     """
     touch  ${samplename}.trimmed.kraken_bacteria.taxa.txt ${samplename}.trimmed.kraken_bacteria.krona.html
     """
@@ -155,18 +157,18 @@ process fastqc {
     //threads: 8
     //module=['fastqc/0.11.9']
 
-    script: 
+    script:
     """
     mkdir -p fastqc
     fastqc -t 8 \
         -f bam \
         -o fastqc \
-        ${samplename}.bqsr.bam 
+        ${samplename}.bqsr.bam
     mv fastqc/${samplename}.bqsr_fastqc.html ${samplename}_fastqc.html
     mv fastqc/${samplename}.bqsr_fastqc.zip ${samplename}_fastqc.zip
     """
 
-    stub: 
+    stub:
     """
     touch  ${samplename}_fastqc.html ${samplename}_fastqc.zip
     """
@@ -174,7 +176,7 @@ process fastqc {
 
 process qualimap_bamqc {
     /*
-    Quality-control step to assess various post-alignment metrics 
+    Quality-control step to assess various post-alignment metrics
     and a secondary method to calculate insert size. Please see
     QualiMap's website for more information about BAM QC:
     http://qualimap.conesalab.org/
@@ -182,15 +184,15 @@ process qualimap_bamqc {
         Recalibrated BAM file (scatter)
     @Output:
         Report containing post-aligment quality-control metrics
-    */    
+    */
 
     input:
         tuple val(samplename), path(bam), path(bai)
 
-    output: 
+    output:
         tuple path("${samplename}_genome_results.txt"), path("${samplename}_qualimapReport.html")
 
-    script: 
+    script:
     """
     unset DISPLAY
     qualimap bamqc -bam ${bam} \
@@ -215,9 +217,9 @@ process qualimap_bamqc {
 
 process samtools_flagstats {
     /*
-    Quality-control step to assess alignment quality. Flagstat provides 
-    counts for each of 13 categories based primarily on bit flags in the 
-    FLAG field. Information on the meaning of the flags is given in the 
+    Quality-control step to assess alignment quality. Flagstat provides
+    counts for each of 13 categories based primarily on bit flags in the
+    FLAG field. Information on the meaning of the flags is given in the
     SAM specification: https://samtools.github.io/hts-specs/SAMv1.pdf
     @Input:
         Recalibrated BAM file (scatter)
@@ -225,21 +227,21 @@ process samtools_flagstats {
         Text file containing alignment statistics
     */
     label 'process_mid'
-    
+
     input:
         tuple val(samplename), path(bam), path(bai)
-    
+
     output:
         path("${samplename}.samtools_flagstat.txt")
 
-    script: 
+    script:
     """
     samtools flagstat ${bam} > ${samplename}.samtools_flagstat.txt
     """
 
     stub:
     """
-    touch ${samplename}.samtools_flagstat.txt    
+    touch ${samplename}.samtools_flagstat.txt
     """
 }
 
@@ -260,7 +262,7 @@ process mosdepth {
     */
     input:
         tuple val(samplename), path(bam), path(bai)
-    
+
     output:
         path("${samplename}.mosdepth.region.dist.txt"),
         path("${samplename}.mosdepth.summary.txt"),
@@ -268,7 +270,7 @@ process mosdepth {
         path("${samplename}.regions.bed.gz.csi")
 
 
-    script: 
+    script:
     """
     mosdepth -n --fast-mode --by 500  ${samplename} ${bam} -t $task.cpus
     """
@@ -282,12 +284,12 @@ process mosdepth {
     """
 }
 
-process vcftools {    
+process vcftools {
     /*
-    Quality-control step to calculates a measure of heterozygosity on 
+    Quality-control step to calculates a measure of heterozygosity on
     a per-individual basis. The inbreeding coefficient, F, is estimated
     for each individual using a method of moments. Please see VCFtools
-    documentation for more information: 
+    documentation for more information:
     https://vcftools.github.io/man_latest.html
     @Input:
         Multi-sample gVCF file (indirect-gather-due-to-aggregation)
@@ -296,14 +298,14 @@ process vcftools {
     */
     label 'process_mid'
 
-    
-    input: 
+
+    input:
         tuple path(germlinevcf),path(germlinetbi)
-    output: 
+    output:
        path("variants_raw_variants.het")
-    
-    
-    script: 
+
+
+    script:
     """
     vcftools --gzvcf ${germlinevcf} --het --out variants_raw_variants
     """
@@ -323,16 +325,16 @@ process collectvariantcallmetrics {
     @Input:
         Multi-sample gVCF file (indirect-gather-due-to-aggregation)
     @Output:
-        Text file containing a collection of metrics relating to snps and indels 
-    */    
-    input: 
+        Text file containing a collection of metrics relating to snps and indels
+    */
+    input:
         tuple path(germlinevcf),path(germlinetbi)
-    
-    output: 
+
+    output:
         tuple path("raw_variants.variant_calling_detail_metrics"),
         path("raw_variants.variant_calling_summary_metrics")
 
-       
+
     script:
     """
     java -Xmx24g -jar \${PICARDJARPATH}/picard.jar \
@@ -341,7 +343,7 @@ process collectvariantcallmetrics {
         OUTPUT= "raw_variants" \
         DBSNP=$DBSNP Validation_Stringency=SILENT
     """
-    
+
     stub:
     """
     touch raw_variants.variant_calling_detail_metrics raw_variants.variant_calling_summary_metrics
@@ -354,9 +356,9 @@ process bcftools_stats {
     /*
     Quality-control step to collect summary statistics from bcftools stats.
     When bcftools stats is run with one VCF file then stats by non-reference
-    allele frequency, depth distribution, stats by quality and per-sample 
-    counts, singleton statsistics are calculated. Please see bcftools' 
-    documentation for more information: 
+    allele frequency, depth distribution, stats by quality and per-sample
+    counts, singleton statsistics are calculated. Please see bcftools'
+    documentation for more information:
     http://samtools.github.io/bcftools/bcftools.html#stats
     @Input:
         Per sample gVCF file (scatter)
@@ -370,8 +372,8 @@ process bcftools_stats {
         tuple val(samplename),  path("${samplename}.gvcf.gz"),path("${samplename}.gvcf.gz.tbi")
     output:
         path("${samplename}.germline.bcftools_stats.txt")
-    
-    script: 
+
+    script:
     """
     bcftools stats ${samplename}.gvcf.gz > ${samplename}.germline.bcftools_stats.txt
     """
@@ -385,11 +387,11 @@ process bcftools_stats {
 
 process gatk_varianteval {
     /*
-    Quality-control step to calculate various quality control metrics from a 
-    variant callset. These metrics include the number of raw or filtered SNP 
+    Quality-control step to calculate various quality control metrics from a
+    variant callset. These metrics include the number of raw or filtered SNP
     counts; ratio of transition mutations to transversions; concordance of a
     particular sample's calls to a genotyping chip; number of s per sample.
-    Please see GATK's documentation for more information: 
+    Please see GATK's documentation for more information:
     https://gatk.broadinstitute.org/hc/en-us/articles/360040507171-VariantEval
     @Input:
         Per sample gVCF file (scatter)
@@ -398,9 +400,9 @@ process gatk_varianteval {
     */
     label 'process_mid'
 
-    input: 
+    input:
         tuple val(samplename), path("${samplename}.gvcf.gz") ,path("${samplename}.gvcf.gz.tbi")
-    output: 
+    output:
         path("${samplename}.germline.eval.grp")
     //params:
      //   rname    = "vareval",
@@ -410,7 +412,7 @@ process gatk_varianteval {
     //message: "Running GATK4 VariantEval on '{input.vcf}' input file"
     //container: config['images']['wes_base']
     //threads: 16
-    script: 
+    script:
     """
     gatk --java-options '-Xmx12g -XX:ParallelGCThreads=16' VariantEval \
         -R $GENOMEREF \
@@ -431,7 +433,7 @@ process snpeff {
     /*
     Data processing and quality-control step to annotate variants, predict its
     functional effects, and collect various summary statistics about variants and
-    their annotations. Please see SnpEff's documentation for more information: 
+    their annotations. Please see SnpEff's documentation for more information:
     https://pcingola.github.io/SnpEff/
     @Input:
         Per sample gVCF file (scatter)
@@ -440,14 +442,14 @@ process snpeff {
     */
     label 'process_mid'
 
-    input:  
+    input:
         tuple val(samplename), path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi")
-    output: 
+    output:
         tuple path("${samplename}.germline.snpeff.ann.vcf"),
         path("${samplename}.germline.snpeff.ann.csv"),
         path("${samplename}.germline.snpeff.ann.html")
 
-    script: 
+    script:
     """
         java -Xmx12g -jar \$SNPEFF_JAR \
         -v -canon -c $SNPEFF_CONFIG \
@@ -478,15 +480,15 @@ process somalier_extract {
 
     input:
         tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai")
-    output: 
+    output:
         path("output/${samplename}.somalier")
     //params:
     //    sites_vcf = config['references']['SOMALIER']['SITES_VCF'],
     //    genomeFasta = config['references']['GENOME'],
     //    rname = 'somalier_extract'
     //container: config['images']['wes_base']
-    script: 
-    """ 
+    script:
+    """
     mkdir -p output
     somalier extract \
         -d output \
@@ -498,7 +500,7 @@ process somalier_extract {
     stub:
     """
     mkdir -p output
-    touch output/${samplename}.somalier 
+    touch output/${samplename}.somalier
     """
 }
 
@@ -518,21 +520,21 @@ process somalier_analysis_human {
 
     input:
         path(somalierin)
-    
+
     output:
         tuple path("relatedness.pairs.tsv"), path("relatedness.samples.tsv"),
         path("ancestry.somalier-ancestry.tsv"), path("predicted.genders.tsv"),
         path("predicted.pairs.tsv"),
         path("sampleAncestryPCAPlot.html"),
         path("predictedPairsAncestry.pdf")
-    
+
     script:
-    """ 
+    """
     echo "Estimating relatedness"
     somalier relate \
         -o "relatedness" \
         $somalierin
-    
+
     echo "Estimating ancestry"
     somalier ancestry \
         -o "ancestry" \
@@ -542,19 +544,19 @@ process somalier_analysis_human {
 
     Rscript $SCRIPT_PATH_GENDER \
         relatedness.samples.tsv \
-        predicted.genders.tsv    
-    
+        predicted.genders.tsv
+
     Rscript $SCRIPT_PATH_SAMPLES \
         relatedness.pairs.tsv \
         predicted.pairs.tsv
-    
+
     Rscript $SCRIPT_PATH_PCA \
         ancestry.somalier-ancestry.tsv \
         predicted.pairs.tsv \
         sampleAncestryPCAPlot.html \
         predictedPairsAncestry.pdf
     """
-    
+
     stub:
 
     """
@@ -581,30 +583,30 @@ process somalier_analysis_mouse {
 
     input:
         path(somalierin)
-    
+
     output:
-        tuple path("relatedness.pairs.tsv"), 
+        tuple path("relatedness.pairs.tsv"),
         path("relatedness.samples.tsv"),
         path("predicted.genders.tsv"),
         path("predicted.pairs.tsv")
-    
+
     script:
-    """ 
+    """
     echo "Estimating relatedness"
     somalier relate \
         -o "relatedness" \
         $somalierin
-    
+
     Rscript $SCRIPT_PATH_GENDER \
         relatedness.samples.tsv \
-        predicted.genders.tsv    
-    
+        predicted.genders.tsv
+
     Rscript $SCRIPT_PATH_SAMPLES \
         relatedness.pairs.tsv \
         predicted.pairs.tsv
-    
+
     """
-    
+
     stub:
 
     """
@@ -612,7 +614,7 @@ process somalier_analysis_mouse {
     touch relatedness.samples.tsv
     touch predicted.genders.tsv
     touch predicted.pairs.tsv
-    
+
     """
 }
 
@@ -620,23 +622,23 @@ process multiqc {
 
     """
     Reporting step to aggregate sample summary statistics and quality-control
-    information across all samples. This will be one of the last steps of the 
-    pipeline. The inputs listed here are to ensure that this step runs last. 
-    During runtime, MultiQC will recurively crawl through the working directory
+    information across all samples. This will be one of the last steps of the
+    pipeline. The inputs listed here are to ensure that this step runs last.
+    During runtime, MultiQC will recursively crawl through the working directory
     and parse files that it supports.
     @Input:
         List of files to ensure this step runs last (gather)
     @Output:
         Interactive MulitQC report and a QC metadata table
     """
-    
-    input:  
+
+    input:
         path(allqcin)
 
-    output: 
+    output:
         path("MultiQC_Report.html")
 
-    script: 
+    script:
 
     """
     multiqc . \
diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf
index 4fa34db..8a69287 100644
--- a/modules/local/trim_align.nf
+++ b/modules/local/trim_align.nf
@@ -3,12 +3,13 @@ KNOWNRECAL = params.genomes[params.genome].KNOWNRECAL
 
 
 process fastp {
+    container = "${params.containers.logan}"
     label 'process_mid'
     tag { name }
 
     input:
     tuple val(samplename), path(fqs)
-    
+
     output:
     tuple val(samplename),
     path("${samplename}.R1.trimmed.fastq.gz"),
@@ -40,15 +41,16 @@ process fastp {
 
 
 process bwamem2 {
+    container = "${params.containers.logan}"
     tag { name }
-    
+
     input:
-        tuple val(samplename), 
+        tuple val(samplename),
         path("${samplename}.R1.trimmed.fastq.gz"),
         path("${samplename}.R2.trimmed.fastq.gz"),
         path("${samplename}.fastp.json"),
         path("${samplename}.fastp.html")
-        
+
     output:
         tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai")
 
@@ -77,9 +79,9 @@ process bwamem2 {
 
 process bqsr {
     /*
-    Base quality recalibration for all samples 
-    */    
-
+    Base quality recalibration for all samples
+    */
+    container = "${params.containers.logan}"
     label 'process_low'
     input:
         tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path(bed)
@@ -99,19 +101,20 @@ process bqsr {
 
     stub:
     """
-    touch ${samplename}_${bed.simpleName}.recal_data.grp 
+    touch ${samplename}_${bed.simpleName}.recal_data.grp
     """
 
 }
 
 process gatherbqsr {
+    container = "${params.containers.logan}"
     label 'process_low'
-    input: 
+    input:
         tuple val(samplename), path(recalgroups)
     output:
         tuple val(samplename), path("${samplename}.recal_data.grp")
     script:
-    
+
     strin = recalgroups.join(" --input ")
 
     """
@@ -131,10 +134,11 @@ process gatherbqsr {
 
 process applybqsr {
     /*
-    Base quality recalibration for all samples to 
-    */   
+    Base quality recalibration for all samples to
+    */
+    container = "${params.containers.logan}"
     label 'process_low'
-    
+
     input:
         tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path("${samplename}.recal_data.grp")
 
@@ -154,7 +158,7 @@ process applybqsr {
     """
 
     stub:
-    
+
     """
     touch ${samplename}.bqsr.bam ${samplename}.bqsr.bai
     """
@@ -164,12 +168,13 @@ process applybqsr {
 
 
 process samtoolsindex {
+    container = "${params.containers.logan}"
     label 'process_mid'
-    publishDir(path: "${outdir}/bams/BQSR", mode: 'copy') 
-    
+    publishDir(path: "${outdir}/bams/BQSR", mode: 'copy')
+
     input:
     tuple val(bamname), path(bam)
-    
+
     output:
     tuple val(bamname), path(bam), path("${bam}.bai")
 
@@ -187,9 +192,10 @@ process samtoolsindex {
 
 //Save to CRAM for output
 process bamtocram_tonly {
+    container = "${params.containers.logan}"
     label 'process_mid'
-    
-    input: 
+
+    input:
         tuple val(tumorname), path(tumor), path(tumorbai)
 
     output:
@@ -204,27 +210,27 @@ process bamtocram_tonly {
 
 /*
 process indelrealign {
-    //Briefly, RealignerTargetCreator runs faster with increasing -nt threads, 
+    //Briefly, RealignerTargetCreator runs faster with increasing -nt threads,
     //while IndelRealigner shows diminishing returns for increasing scatter
-    
+
     tag { name }
-    
+
     input:
     tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai")
 
     output:
     tuple val(samplename), path("${samplename}.ir.bam")
 
-    script: 
-    
+    script:
+
     """
     /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T RealignerTargetCreator \
         -I ${samplename}.bam \
         -R ${GENOMEREF} \
         -o ${samplename}.intervals \
         -nt 16 \
-        -known ${MILLSINDEL} -known ${SHAPEITINDEL} 
-    
+        -known ${MILLSINDEL} -known ${SHAPEITINDEL}
+
     /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T IndelRealigner \
         -R ${GENOMEREF} \
         -I ${samplename}.bam \
@@ -234,11 +240,11 @@ process indelrealign {
         -targetIntervals ${samplename}.intervals \
         -o  ${samplename}.ir.bam
     """
-    
+
 
     stub:
     """
-    touch ${samplename}.ir.bam 
+    touch ${samplename}.ir.bam
     """
 
 }
diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index e23cd0e..c580c2b 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -1,10 +1,10 @@
 GENOMEREF=file(params.genomes[params.genome].genome)
 GENOMEFAI=file(params.genomes[params.genome].genomefai)
 GENOMEDICT=file(params.genomes[params.genome].genomedict)
-KGPGERMLINE=params.genomes[params.genome].kgp 
-DBSNP=file(params.genomes[params.genome].dbsnp) 
-GNOMADGERMLINE=params.genomes[params.genome].gnomad 
-PON=file(params.genomes[params.genome].pon) 
+KGPGERMLINE=params.genomes[params.genome].kgp
+DBSNP=file(params.genomes[params.genome].dbsnp)
+GNOMADGERMLINE=params.genomes[params.genome].gnomad
+PON=file(params.genomes[params.genome].pon)
 VEPCACHEDIR=file(params.genomes[params.genome].vepcache)
 VEPSPECIES=params.genomes[params.genome].vepspecies
 VEPBUILD=params.genomes[params.genome].vepbuild
@@ -15,20 +15,21 @@ LOFREQ_CONVERT=params.lofreq_convert
 
 
 process mutect2 {
+    container = "${params.containers.logan}"
     label 'process_somaticcaller'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai),
-        val(normalname), path(normal), path(normalbai), 
+        val(normalname), path(normal), path(normalbai),
         path(bed)
-    
+
     output:
         tuple val(tumorname), val(normalname),
         path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz"),
         path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.f1r2.tar.gz"),
         path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats")
 
-    
+
     script:
     """
     gatk Mutect2 \
@@ -55,12 +56,13 @@ process mutect2 {
 
 
 process pileup_paired_t {
+    container = "${params.containers.logan}"
     label 'process_highmem'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai),
         val(normalname), path(normal), path(normalbai), path(bed)
-    
+
     output:
         tuple val(tumorname),
         path("${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table")
@@ -71,7 +73,7 @@ process pileup_paired_t {
         -I ${tumor} \
         -V $KGPGERMLINE \
         -L ${bed} \
-        -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table 
+        -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table
 
     """
 
@@ -84,12 +86,13 @@ process pileup_paired_t {
 
 
 process pileup_paired_n {
+    container = "${params.containers.logan}"
     label 'process_highmem'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai),
         val(normalname), path(normal), path(normalbai), path(bed)
-    
+
     output:
         tuple val(tumorname),
         path("${tumor.simpleName}_${bed.simpleName}.normal.pileup.table")
@@ -100,10 +103,10 @@ process pileup_paired_n {
         -I ${normal} \
         -V $KGPGERMLINE \
         -L ${bed} \
-        -O ${tumor.simpleName}_${bed.simpleName}.normal.pileup.table 
+        -O ${tumor.simpleName}_${bed.simpleName}.normal.pileup.table
 
     """
-    
+
     stub:
     """
     touch ${tumor.simpleName}_${bed.simpleName}.normal.pileup.table
@@ -113,13 +116,14 @@ process pileup_paired_n {
 
 
 process contamination_paired {
+    container = "${params.containers.logan}"
     label 'process_highmem'
 
     input:
         tuple val(tumorname),
         path(tumor_pileups),
         path(normal_pileups)
-    
+
     output:
         tuple val(tumorname),
         path("${tumorname}_allpileups.table"),
@@ -137,7 +141,7 @@ process contamination_paired {
     gatk GatherPileupSummaries \
     --sequence-dictionary $GENOMEDICT \
     -I ${alltumor} -O ${tumorname}_allpileups.table
-    
+
     gatk GatherPileupSummaries \
     --sequence-dictionary $GENOMEDICT \
     -I ${allnormal} -O ${tumorname}_normal.allpileups.table
@@ -161,20 +165,21 @@ process contamination_paired {
     touch ${tumorname}_normal.contamination.table
     """
 
-    
+
 }
 
 
 process learnreadorientationmodel {
+    container = "${params.containers.logan}"
     label 'process_highmem'
 
     input:
         tuple val(sample), path(f1r2)
-      
+
     output:
         tuple val(sample), path("${sample}.read-orientation-model.tar.gz")
 
-    script: 
+    script:
     f1r2in = f1r2.join(" --input ")
 
     """
@@ -191,15 +196,16 @@ process learnreadorientationmodel {
 
 
 process mergemut2stats {
+    container = "${params.containers.logan}"
     label 'process_low'
 
     input:
         tuple val(sample), path(stats)
-      
+
     output:
         tuple val(sample), path("${sample}.final.stats")
 
-    script: 
+    script:
     statsin = stats.join(" --stats ")
 
     """
@@ -217,17 +223,18 @@ process mergemut2stats {
 
 
 process mutect2filter {
+    container = "${params.containers.logan}"
     label 'process_mid'
-        
+
     input:
-        tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs), 
+        tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs),
         path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination)
-    
+
     output:
-        tuple val("${tumor}_vs_${normal}"), 
-        path("${tumor}_vs_${normal}.mut2.marked.vcf.gz"), 
+        tuple val("${tumor}_vs_${normal}"),
+        path("${tumor}_vs_${normal}.mut2.marked.vcf.gz"),
         path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi"),
-        path("${tumor}_vs_${normal}.mut2.norm.vcf.gz"), path("${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi"), 
+        path("${tumor}_vs_${normal}.mut2.norm.vcf.gz"), path("${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi"),
         path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.filteringStats.tsv")
 
     script:
@@ -247,7 +254,7 @@ process mutect2filter {
         --variant ${tumor}_vs_${normal}.mut2.marked.vcf.gz \
         --exclude-filtered \
         --output ${tumor}_vs_${normal}.mut2.final.vcf.gz
-    
+
     bcftools sort ${tumor}_vs_${normal}.mut2.final.vcf.gz |\
     bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\
@@ -267,18 +274,19 @@ process mutect2filter {
 
 
 process strelka_tn {
+    container = "${params.containers.logan}"
     label 'process_highcpu'
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai), 
+        tuple val(tumorname), path(tumor), path(tumorbai),
         val(normalname), path(normal), path(normalbai), path(bed)
-    
+
     output:
         tuple val(tumorname), val(normalname),
         path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz"),
         path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi"),
         path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz"),
         path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi")
-    
+
     script:
 
     """
@@ -296,21 +304,21 @@ process strelka_tn {
     ./wd/runWorkflow.py -m local -j $task.cpus
     mv wd/results/variants/somatic.snvs.vcf.gz  ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz
     mv wd/results/variants/somatic.indels.vcf.gz  ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz
-    
-    printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" >sampname 
-    
+
+    printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" >sampname
+
     bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz \
-        | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz 
+        | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz
     bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz \
-        | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz 
+        | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz
 
     bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz
-    bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz 
-  
+    bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz
+
     """
 
     stub:
-    
+
     """
     touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz  ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi
     touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi
@@ -321,16 +329,17 @@ process strelka_tn {
 
 
 process vardict_tn {
+    container = "${params.containers.logan}"
     label 'process_highcpu'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed)
-    
+
     output:
         tuple val(tumorname), val(normalname),
         path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz")
-    //bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and 
-    //filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))” 
+    //bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and
+    //filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))”
     script:
 
     """
@@ -350,8 +359,8 @@ process vardict_tn {
             -S \
             -f 0.05 >  ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf
 
-    printf "${normal.Name}\t${normalname}\n${tumor.Name}\t${tumorname}\n" > sampname 
-    
+    printf "${normal.Name}\t${normalname}\n${tumor.Name}\t${tumorname}\n" > sampname
+
     bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf \
         | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz
 
@@ -359,7 +368,7 @@ process vardict_tn {
     """
 
     stub:
-    
+
     """
     touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz
 
@@ -370,19 +379,20 @@ process vardict_tn {
 
 
 process varscan_tn {
+    container = "${params.containers.logan}"
     label 'process_somaticcaller'
 
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai), 
+        tuple val(tumorname), path(tumor), path(tumorbai),
         val(normalname), path(normal), path(normalbai), path(bed),
         val(tumor1),
-        path(tumorpileup), path(normalpileup), 
+        path(tumorpileup), path(normalpileup),
         path(tumor_con_table), path(normal_con_table)
-    
+
     output:
         tuple val(tumorname), val(normalname),
         path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf.gz")
-    
+
     shell:
     '''
     tumor_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{tumor_con_table} | cut -f2 ))" | bc -l)
@@ -402,8 +412,8 @@ process varscan_tn {
     -R !{GENOMEREF} -SD !{GENOMEDICT} \
     -O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf
 
-    printf "NORMAL\t!{normalname}\nTUMOR\t!{tumorname}\n" > sampname 
-    
+    printf "NORMAL\t!{normalname}\nTUMOR\t!{tumorname}\n" > sampname
+
     bcftools reheader -s sampname !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf \
        | bcftools view -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.gz
 
@@ -418,17 +428,18 @@ process varscan_tn {
 
 
 process octopus_tn {
+    container = "${params.containers.octopus}"
     //label 'process_highcpu' Using separate docker for octopus
 
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai), 
+        tuple val(tumorname), path(tumor), path(tumorbai),
         val(normalname), path(normal), path(normalbai), path(bed)
-    
+
 
     output:
         tuple val("${tumorname}_vs_${normalname}"),
         path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz")
-    
+
     script:
 
     """
@@ -443,32 +454,33 @@ process octopus_tn {
     """
 
     stub:
-    
+
     """
     touch "${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz"
     """
 
-} 
+}
 
 
 process lofreq_tn {
-    label 'process_somaticcaller' 
+    container = "${params.containers.logan}"
+    label 'process_somaticcaller'
 
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai), 
+        tuple val(tumorname), path(tumor), path(tumorbai),
         val(normalname), path(normal), path(normalbai), path(bed)
-    
+
 
     output:
-    
-        tuple val(tumorname), val(normalname), 
+
+        tuple val(tumorname), val(normalname),
         path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz"),
         path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"),
         path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"),
         path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz"),
         path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz"),
         path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi")
-    
+
     script:
 
     """
@@ -478,16 +490,16 @@ process lofreq_tn {
         -l ${bed} \
         --call-indels \
         -o ${tumorname}_vs_${normalname}_${bed.simpleName}_
-    
+
     bcftools concat ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz \
         ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz --threads $task.cpus -Oz -o \
         ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz
 
     $LOFREQ_CONVERT -i ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz -g 1/0 \
         -n ${tumorname} -o ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz
-    
+
     bcftools view -h ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz >temphead
-    
+
     sed 's/^##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">/##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref\\/fwd, ref\\/rev, var\\/fwd, var\\/rev">/' temphead > temphead1
     bcftools reheader ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz -h temphead1 |\
         bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz
@@ -497,71 +509,73 @@ process lofreq_tn {
     """
 
     stub:
-    
+
     """
     touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz"
     touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"
     touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"
     touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz"
     touch "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz" "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi"
-    
+
     """
-} 
+}
 
 
 
 process muse_tn {
-    label 'process_somaticcaller' 
+    container = "${params.containers.logan}"
+    label 'process_somaticcaller'
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai), 
+        tuple val(tumorname), path(tumor), path(tumorbai),
         val(normalname), path(normal), path(normalbai)
-    
+
 
     output:
         tuple val(tumorname), val(normalname),
         path("${tumorname}_vs_${normalname}.vcf.gz")
-    
+
     script:
 
     """
     MuSE call -f $GENOMEREF -O ${tumorname}_vs_${normalname} -n $task.cpus $tumor $normal
     MuSE sump -I ${tumorname}_vs_${normalname}.MuSE.txt \
         -O ${tumorname}_vs_${normalname}.vcf -n $task.cpus -D $DBSNP -G
-    
+
     bcftools view ${tumorname}_vs_${normalname}.vcf -Oz -o ${tumorname}_vs_${normalname}_temp.vcf.gz
 
-    printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" > sampname 
-    
+    printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" > sampname
+
     bcftools reheader -s sampname ${tumorname}_vs_${normalname}_temp.vcf.gz \
         | bcftools view -Oz -o ${tumorname}_vs_${normalname}.vcf.gz
 
     """
 
     stub:
-    
+
     """
     touch "${tumorname}_vs_${normalname}.vcf.gz"
     """
 
-} 
+}
 
 
 process combineVariants {
+    container = "${params.containers.logan}"
     label 'process_highmem'
 
     input:
         tuple val(sample), path(inputvcf), val(vc)
-    
+
     output:
-        tuple val(sample), 
-        path("${vc}/${sample}.${vc}.marked.vcf.gz"), 
-        path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), 
+        tuple val(sample),
+        path("${vc}/${sample}.${vc}.marked.vcf.gz"),
+        path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"),
         path("${vc}/${sample}.${vc}.norm.vcf.gz"),
         path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi")
 
     script:
     vcfin = inputvcf.join(" -I ")
-    
+
     """
     mkdir ${vc}
     gatk --java-options "-Xmx48g" SortVcf \
@@ -596,21 +610,22 @@ process combineVariants {
 
 
 process combineVariants_alternative {
+    container = "${params.containers.logan}"
     label 'process_highmem'
 
     input:
         tuple val(sample), path(vcfs), path(vcfsindex), val(vc)
-    
+
     output:
-        tuple val(sample), 
-        path("${vc}/${sample}.${vc}.marked.vcf.gz"), 
-        path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), 
+        tuple val(sample),
+        path("${vc}/${sample}.${vc}.marked.vcf.gz"),
+        path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"),
         path("${vc}/${sample}.${vc}.norm.vcf.gz"),
         path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi")
-    
+
     script:
     vcfin = vcfs.join(" ")
-    
+
     """
     mkdir ${vc}
     bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz
@@ -636,13 +651,14 @@ process combineVariants_alternative {
     touch ${vc}/${sample}.${vc}.norm.vcf.gz
     touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi
     touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi
-    
+
     """
 
 }
 
 
 process bcftools_index_octopus {
+    container = "${params.containers.logan}"
     label 'process_low'
 
     input:
@@ -651,10 +667,10 @@ process bcftools_index_octopus {
 
     output:
         tuple val(tumor),
-        path(vcf), 
+        path(vcf),
         path("${vcf}.tbi")
-    
-    script:    
+
+    script:
     """
     bcftools index -t ${vcf}
     """
@@ -670,32 +686,33 @@ process bcftools_index_octopus {
 
 process combineVariants_strelka {
     //Concat all somatic snvs/indels across all files, strelka separates snv/indels
+    container = "${params.containers.logan}"
     label 'process_mid'
 
     input:
-        tuple val(sample), 
+        tuple val(sample),
         path(strelkasnvs), path(snvindex),
         path(strelkaindels), path(indelindex)
-    
+
     output:
-        tuple val(sample), 
+        tuple val(sample),
         path("${sample}.strelka.vcf.gz"), path("${sample}.strelka.vcf.gz.tbi"),
         path("${sample}.filtered.strelka.vcf.gz"), path("${sample}.filtered.strelka.vcf.gz.tbi")
-    
-    
+
+
     script:
-    
+
     vcfin = strelkasnvs.join(" ")
     indelsin = strelkaindels.join(" ")
 
 
     """
-    bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz -a 
+    bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz -a
     bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\
         sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz
 
-    bcftools sort ${sample}.temp1.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz 
+    bcftools sort ${sample}.temp1.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz
 
     bcftools view ${sample}.strelka.vcf.gz --threads $task.cpus -f PASS -Oz -o ${sample}.filtered.strelka.vcf.gz
 
@@ -708,16 +725,17 @@ process combineVariants_strelka {
     """
     touch ${sample}.strelka.vcf.gz ${sample}.strelka.vcf.gz.tbi
     touch ${sample}.filtered.strelka.vcf.gz ${sample}.filtered.strelka.vcf.gz.tbi
-    
+
     """
 
 }
 
 
 process somaticcombine {
+    container = "${params.containers.logan}"
     label 'process_mid'
 
-    input: 
+    input:
         tuple val(tumorsample), val(normal),
         val(callers),
         path(vcfs), path(vcfindex)
@@ -730,7 +748,7 @@ process somaticcombine {
     script:
         vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
         vcfin2="-V:" + vcfin1.join(" -V:")
-    
+
     """
     java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \
         -R $GENOMEREF \
@@ -753,10 +771,11 @@ process somaticcombine {
 }
 
 
-process annotvep_tn {    
+process annotvep_tn {
+    container = "${params.containers.vcf2maf}"
     input:
-        tuple val(tumorsample), val(normalsample), 
-        val(vc), path(tumorvcf), path(vcfindex) 
+        tuple val(tumorsample), val(normalsample),
+        val(vc), path(tumorvcf), path(vcfindex)
 
     output:
         path("paired/${vc}/${tumorsample}_vs_${normalsample}.maf")
@@ -771,15 +790,15 @@ process annotvep_tn {
     NORM_VCF_ID_ARG=""
     NSAMPLES=${#VCF_SAMPLE_IDS[@]}
     if [ $NSAMPLES -gt 1 ]; then
-        # Assign tumor, normal IDs 
-        # Look through column names and 
+        # Assign tumor, normal IDs
+        # Look through column names and
         # see if they match provided IDs
         for (( i = 0; i < $NSAMPLES; i++ )); do
             echo "${VCF_SAMPLE_IDS[$i]}"
             if [ "${VCF_SAMPLE_IDS[$i]}" == !{tumorsample} ]; then
                 TID_IDX=$i
             fi
-            
+
             if [ "${VCF_SAMPLE_IDS[$i]}" == !{normalsample} ]; then
                 NID_IDX=$i
             fi
@@ -791,9 +810,9 @@ process annotvep_tn {
         fi
     fi
     VCF_TID=${VCF_SAMPLE_IDS[$TID_IDX]}
-   
+
     zcat !{tumorvcf} > !{tumorvcf.baseName}
-    
+
     mkdir -p paired/!{vc}
 
     vcf2maf.pl \
@@ -817,10 +836,12 @@ process annotvep_tn {
 
 
 process combinemafs_tn {
+    container = "${params.containers.logan}"
+
     label 'process_low'
     publishDir(path: "${outdir}/mafs/paired", mode: 'copy')
 
-    input: 
+    input:
         path(allmafs)
 
     output:
@@ -840,5 +861,3 @@ process combinemafs_tn {
     touch final_tn.maf
     """
 }
-
-
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index 9f8bf93..bdc5731 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -1,10 +1,10 @@
 GENOMEREF=file(params.genomes[params.genome].genome)
 GENOMEFAI=file(params.genomes[params.genome].genomefai)
 GENOMEDICT=file(params.genomes[params.genome].genomedict)
-KGPGERMLINE=params.genomes[params.genome].kgp 
-DBSNP=file(params.genomes[params.genome].dbsnp) 
-GNOMADGERMLINE=params.genomes[params.genome].gnomad 
-PON=file(params.genomes[params.genome].pon) 
+KGPGERMLINE=params.genomes[params.genome].kgp
+DBSNP=file(params.genomes[params.genome].dbsnp)
+GNOMADGERMLINE=params.genomes[params.genome].gnomad
+PON=file(params.genomes[params.genome].pon)
 VEPCACHEDIR=file(params.genomes[params.genome].vepcache)
 VEPSPECIES=params.genomes[params.genome].vepspecies
 VEPBUILD=params.genomes[params.genome].vepbuild
@@ -13,11 +13,13 @@ GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest
 
 
 process pileup_paired_tonly {
+    container = "${params.containers.logan}"
+
     label 'process_highmem'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), path(bed)
-    
+
     output:
         tuple val(tumorname),
         path("${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table")
@@ -29,7 +31,7 @@ process pileup_paired_tonly {
         -I ${tumor} \
         -V $KGPGERMLINE \
         -L ${bed} \
-        -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table 
+        -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table
 
     """
 
@@ -43,6 +45,8 @@ process pileup_paired_tonly {
 
 
 process contamination_tumoronly {
+    container = "${params.containers.logan}"
+
     label 'process_highmem'
 
     input:
@@ -63,7 +67,7 @@ process contamination_tumoronly {
     gatk GatherPileupSummaries \
     --sequence-dictionary $GENOMEDICT \
     -I ${alltumor} -O ${tumorname}_allpileups.table
-    
+
     gatk CalculateContamination \
         -I ${tumorname}_allpileups.table \
         -O ${tumorname}.contamination.table
@@ -81,15 +85,17 @@ process contamination_tumoronly {
 
 
 process learnreadorientationmodel_tonly {
+    container = "${params.containers.logan}"
+
     label 'process_highmem'
 
     input:
         tuple val(sample), path(f1r2)
-      
+
     output:
     tuple val(sample), path("${sample}.read-orientation-model.tar.gz")
 
-    script: 
+    script:
     f1r2in = f1r2.join(" --input ")
 
     """
@@ -109,15 +115,17 @@ process learnreadorientationmodel_tonly {
 
 
 process mergemut2stats_tonly {
+    container = "${params.containers.logan}"
+
     label 'process_low'
 
     input:
         tuple val(sample), path(stats)
-      
+
     output:
         tuple val(sample), path("${sample}.final.stats")
 
-    script: 
+    script:
     statsin = stats.join(" --stats ")
 
     """
@@ -136,16 +144,18 @@ process mergemut2stats_tonly {
 
 
 process mutect2_t_tonly {
+    container = "${params.containers.logan}"
+
     label 'process_somaticcaller'
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), path(bed)
-    
+
     output:
         tuple val(tumorname),
         path("${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz"),
         path("${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz"),
         path("${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz.stats")
-    
+
     script:
 
     """
@@ -158,7 +168,7 @@ process mutect2_t_tonly {
     --panel-of-normals $PON \
     --output ${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz \
     --f1r2-tar-gz ${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz \
-    --independent-mates    
+    --independent-mates
     """
 
     stub:
@@ -174,14 +184,16 @@ process mutect2_t_tonly {
 
 
 process mutect2filter_tonly {
+    container = "${params.containers.logan}"
+
     label 'process_mid'
 
     input:
         tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups),path(tumorcontamination)
     output:
-        tuple val(sample), 
-        path("${sample}.tonly.mut2.marked.vcf.gz"),path("${sample}.tonly.mut2.marked.vcf.gz.tbi"), 
-        path("${sample}.tonly.mut2.norm.vcf.gz"),path("${sample}.tonly.mut2.norm.vcf.gz.tbi"), 
+        tuple val(sample),
+        path("${sample}.tonly.mut2.marked.vcf.gz"),path("${sample}.tonly.mut2.marked.vcf.gz.tbi"),
+        path("${sample}.tonly.mut2.norm.vcf.gz"),path("${sample}.tonly.mut2.norm.vcf.gz.tbi"),
         path("${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv")
 
     script:
@@ -190,8 +202,8 @@ process mutect2filter_tonly {
 
 
     """
-    gatk GatherVcfs -I ${mut2in} -O ${sample}.tonly.concat.vcf.gz 
-    gatk IndexFeatureFile -I ${sample}.tonly.concat.vcf.gz 
+    gatk GatherVcfs -I ${mut2in} -O ${sample}.tonly.concat.vcf.gz
+    gatk IndexFeatureFile -I ${sample}.tonly.concat.vcf.gz
     gatk FilterMutectCalls \
         -R $GENOMEREF \
         -V ${sample}.tonly.concat.vcf.gz \
@@ -225,16 +237,18 @@ process mutect2filter_tonly {
 
 
 process varscan_tonly {
+    container = "${params.containers.logan}"
+
     label 'process_somaticcaller'
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai), 
+        tuple val(tumorname), path(tumor), path(tumorbai),
         path(bed),
         path(tumorpileup),  path(tumor_con_table)
-    
+
     output:
         tuple val(tumorname),
         path("${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf.gz")
-    
+
     shell:
 
     '''
@@ -247,8 +261,8 @@ process varscan_tonly {
     awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp \
         | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf
 
-    printf "TUMOR\t!{tumorname}\n" > sampname 
-    
+    printf "TUMOR\t!{tumorname}\n" > sampname
+
     bcftools reheader -s sampname !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf \
         | bcftools view -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf.gz
 
@@ -263,14 +277,16 @@ process varscan_tonly {
 
 
 process vardict_tonly {
+    container = "${params.containers.logan}"
+
     label 'process_highcpu'
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), path(bed)
-    
+
     output:
         tuple val(tumorname),
         path("${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz")
-    
+
     script:
 
     """
@@ -291,15 +307,15 @@ process vardict_tonly {
             -E \
             -f 0.05 >  ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf
 
-    printf "${tumor.Name}\t${tumorname}\n" > sampname 
-    
+    printf "${tumor.Name}\t${tumorname}\n" > sampname
+
     bcftools reheader -s sampname ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf \
         | bcftools view -Oz -o ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz
 
     """
 
     stub:
-    
+
     """
     touch ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz
 
@@ -309,15 +325,16 @@ process vardict_tonly {
 
 
 process octopus_tonly {
+    container = "${params.containers.octopus}"
     //label 'process_highcpu'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), path(bed)
-    
+
     output:
         tuple val(tumorname),
         path("${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz")
-    
+
     script:
 
     """
@@ -332,7 +349,7 @@ process octopus_tonly {
     """
 
     stub:
-    
+
     """
     touch ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz
     """
@@ -341,11 +358,13 @@ process octopus_tonly {
 
 
 process somaticcombine_tonly {
+    container = "${params.containers.logan}"
+
     label 'process_mid'
     publishDir(path: "${outdir}/vcfs/combined_tonly", mode: 'copy')
 
-    input: 
-        tuple val(tumorsample), 
+    input:
+        tuple val(tumorsample),
         val(callers),
         path(vcfs), path(vcfindex)
 
@@ -376,11 +395,13 @@ process somaticcombine_tonly {
 }
 
 process annotvep_tonly {
+    container = "${params.containers.vcf2maf}"
+
     publishDir("${outdir}/mafs", mode: "copy")
 
     input:
-        tuple val(tumorsample), 
-        val(vc), path(tumorvcf), 
+        tuple val(tumorsample),
+        val(vc), path(tumorvcf),
         path(vcfindex)
 
 
@@ -397,15 +418,15 @@ process annotvep_tonly {
     NORM_VCF_ID_ARG=""
     NSAMPLES=${#VCF_SAMPLE_IDS[@]}
     if [ $NSAMPLES -gt 1 ]; then
-        # Assign tumor, normal IDs 
-        # Look through column names and 
+        # Assign tumor, normal IDs
+        # Look through column names and
         # see if they match provided IDs
         for (( i = 0; i < $NSAMPLES; i++ )); do
             echo "${VCF_SAMPLE_IDS[$i]}"
             if [ "${VCF_SAMPLE_IDS[$i]}" == !{tumorsample} ]; then
                 TID_IDX=$i
             fi
-            
+
         done
 
         if [ ! -z $NID_IDX ]; then
@@ -414,9 +435,9 @@ process annotvep_tonly {
         fi
     fi
     VCF_TID=${VCF_SAMPLE_IDS[$TID_IDX]}
-   
+
     zcat !{tumorvcf} > !{tumorvcf.baseName}
-    
+
     mkdir -p tumor_only/!{vc}
 
     vcf2maf.pl \
@@ -439,9 +460,10 @@ process annotvep_tonly {
 }
 
 process combinemafs_tonly {
+    container = "${params.containers.logan}"
     label 'process_low'
 
-    input: 
+    input:
         path(allmafs)
 
     output:
@@ -449,7 +471,7 @@ process combinemafs_tonly {
 
     shell:
     mafin= allmafs.join(" ")
-    
+
     """
     echo "Combining MAFs..."
     head -2 ${allmafs[0]} > final_tonly.maf
@@ -461,6 +483,3 @@ process combinemafs_tonly {
     touch final_tonly.maf
     """
 }
-
-
-
diff --git a/nextflow.config b/nextflow.config
index 3a3392d..6f66adb 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -7,6 +7,7 @@ manifest {
     mainScript = "main.nf"
 }
 
+
 includeConfig 'conf/containers.config'
 includeConfig 'conf/genomes.config'
 includeConfig 'conf/base.config'
@@ -73,7 +74,7 @@ profiles {
         cacheDir = "$PWD/singularity"
         envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID'
         runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb'
-        }
+    }
     biowulf {
         includeConfig 'conf/biowulf.config'
     }

From a06fa5d0428dbe4b97b6570415c86810334eb372 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Tue, 9 Jan 2024 20:06:17 -0500
Subject: [PATCH 39/58] fix: bam cnv mode

---
 main.nf | 47 +++++++++++++++++++++++------------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/main.nf b/main.nf
index 2c8367b..0a715e1 100644
--- a/main.nf
+++ b/main.nf
@@ -5,12 +5,12 @@ date = new Date().format( 'yyyyMMdd' )
 
 
 log.info """\
-         L O G A N     P I P E L I N E    
+         L O G A N     P I P E L I N E
          =============================
          genome: ${params.genome}
          outdir: ${params.outdir}
          Sample Sheet: ${params.sample_sheet}
-         Samples: ${params.fastq_input} ${params.fastq_file_input} ${params.bam_input} ${params.bam_file_input} 
+         Samples: ${params.fastq_input} ${params.fastq_file_input} ${params.bam_input} ${params.bam_file_input}
          """
          .stripIndent()
 
@@ -49,10 +49,10 @@ workflow {
         //Tumor-Normal VC, SV, CNV
         if (params.vc){
             VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet)
-        }   
+        }
         if (params.sv){
             SV(ALIGN.out.bamwithsample)
-        }  
+        }
         if (params.cnv){
             if (params.genome == "mm10"){
                 CNVmouse(ALIGN.out.bamwithsample)
@@ -64,25 +64,25 @@ workflow {
                     CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input)
                 }
             }
-    }  
+    }
         if (params.qc && params.gl){
             QC_GL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout,GL.out.glnexusout,GL.out.bcfout)
         }  else if (params.qc){
             QC_NOGL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout)
-        }  
+        }
 
     }
-    
+
     //TUMOR-NOMRAL BAM INPUT
     if ([params.bam_input,params.bam_file_input].any() && params.sample_sheet){
         println "Tumor-Normal with BAMs"
         INPUT_BAM()
         if (params.vc){
             VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet)
-        }  
+        }
         if (params.sv){
             SV(INPUT_BAM.out.bamwithsample)
-        }  
+        }
         if (params.cnv){
             if (params.genome == "mm10"){
                 CNVmouse(INPUT_BAM.out.bamwithsample)
@@ -90,13 +90,13 @@ workflow {
                 if (!params.vc){
                     VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet)
                     CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input)
-                }else { 
+                }else {
                     CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input)
                 }
             }
         }
-    }  
-    
+    }
+
     ///Tumor Only Pipelines
     if ([params.fastq_input,params.file_input].any() && !params.sample_sheet){
         println "Tumor-Only FASTQ"
@@ -115,7 +115,7 @@ workflow {
                 if (!params.vc){
                     VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet)
                     CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input)
-                } else{
+                } else {
                     CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input)
                 }
             }
@@ -134,20 +134,19 @@ workflow {
         }
         if (params.sv){
             SV_TONLY(INPUT_TONLY_BAM.out.bamwithsample)
-        }  
+        }
         if (params.cnv){
             if (params.genome == "mm10"){
                 CNVmouse_tonly(INPUT_TONLY_BAM.out.bamwithsample)
             } else if (params.genome== "hg38"){
-                VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet)
-                CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input)
-            }
-        }  
-    
-    }
+                if (!params.vc){
+                    VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet)
+                    CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input)
+                } else {
+                    CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input)
+                }
 
+        }
+        }
+    }
 }
-
-    
-
-

From 1484caea5a91395c92b4a233165e3affe5fdae25 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 10 Jan 2024 10:58:19 -0500
Subject: [PATCH 40/58] fix: update task (cpu/mem) profiles and output dirs

---
 conf/base.config                       | 11 +++++++++
 conf/modules.config                    | 12 ++++++----
 modules/local/copynumber.nf            | 20 +++++-----------
 modules/local/qc.nf                    | 10 ++++----
 modules/local/structural_variant.nf    | 32 ++++++++++++--------------
 modules/local/trim_align.nf            |  7 +++---
 modules/local/variant_calling.nf       | 10 ++++----
 modules/local/variant_calling_tonly.nf |  9 +++-----
 8 files changed, 56 insertions(+), 55 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index c525125..e6f151c 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -51,6 +51,16 @@ process {
     withLabel:process_high_memory {
         memory = { check_max( 200.GB * task.attempt, 'memory' ) }
     }
+    withLabel:process_somaticcaller {
+        cpus   = { check_max( 4     * task.attempt, 'cpus'    ) }
+        memory = { check_max( 64.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 72.h   * task.attempt, 'time'    ) }
+    }
+    withLabel:process_highmem {
+        cpus   = { check_max( 4     * task.attempt, 'cpus'    ) }
+        memory = { check_max( 48.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 72.h   * task.attempt, 'time'    ) }
+    }
     withLabel:error_ignore {
         errorStrategy = 'ignore'
     }
@@ -58,4 +68,5 @@ process {
         errorStrategy = 'retry'
         maxRetries    = 2
     }
+
 }
diff --git a/conf/modules.config b/conf/modules.config
index a5ff333..f3b5495 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -9,7 +9,6 @@ process {
     errorStrategy = 'finish'
 
     withName:'sequenza' {
-        container = 'dnousome/ccbr_logan_base:v0.3.3'
         publishDir = [
             path: { "${params.outdir}/cnv/sequenza" },
             mode: 'copy'
@@ -22,12 +21,9 @@ process {
             path: { "${params.outdir}/cnv/freec_paired" },
             mode: 'copy'
         ]
-        container = 'dnousome/ccbr_logan_base:v0.3.3'
-
     }
 
     withName:'freec' {
-        container = 'dnousome/ccbr_logan_base:v0.3.3'
         publishDir = [
             path: { "${params.outdir}/cnv/freec_unpaired" },
             mode: 'copy'
@@ -292,4 +288,12 @@ process {
             mode: 'copy'
         ]
     }
+
+
+  withName: 'octopus_tn|octopus_tonly' {
+        memory = 72.GB
+        time = 24.h
+        cpus = 16
+    }
+
 }
diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf
index e5e0691..0f4a92b 100644
--- a/modules/local/copynumber.nf
+++ b/modules/local/copynumber.nf
@@ -127,9 +127,7 @@ process sequenza {
 
 process freec_paired {
     container = "${params.containers.logan}"
-
     label 'process_highcpu'
-    publishDir("${outdir}/cnv/freec_paired", mode: 'copy')
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai),
@@ -194,9 +192,7 @@ process freec_paired {
 
 process freec {
     container = "${params.containers.logan}"
-
-    label 'process_mid'
-    publishDir("${outdir}/cnv/freec_unpaired", mode: 'copy')
+    label 'process_medium'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai)
@@ -260,7 +256,7 @@ process freec {
 process amber_tonly {
     container = "${params.containers.logan}"
 
-    label 'process_mid'
+    label 'process_medium'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai)
@@ -297,7 +293,7 @@ process amber_tonly {
 process amber_tn {
     container = "${params.containers.logan}"
 
-    label 'process_mid'
+    label 'process_medium'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai),
@@ -335,7 +331,7 @@ process amber_tn {
 process cobalt_tonly {
     container = "${params.containers.logan}"
 
-    label "process_mid"
+    label 'process_medium'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai)
@@ -370,7 +366,7 @@ process cobalt_tonly {
 process cobalt_tn {
     container = "${params.containers.logan}"
 
-    label "process_mid"
+    label 'process_medium'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai),
@@ -407,9 +403,7 @@ process cobalt_tn {
 
 process purple {
     container = "${params.containers.logan}"
-
-    label 'process_mid'
-    publishDir("${outdir}/cnv/purple", mode: 'copy')
+    label 'process_medium'
 
     input:
         tuple val(tumorname),
@@ -451,8 +445,6 @@ process purple {
 process ascat_tn {
     module=["java/12.0.1","R/3.6.3"]
 
-    publishDir("${outdir}/purple", mode: 'copy')
-
     input:
         tuple val(samplename), path(cobaltin), path(amberin), path("${samplename}.tonly.final.mut2.vcf.gz")
 
diff --git a/modules/local/qc.nf b/modules/local/qc.nf
index 04b8022..f8bbc89 100644
--- a/modules/local/qc.nf
+++ b/modules/local/qc.nf
@@ -226,7 +226,7 @@ process samtools_flagstats {
     @Output:
         Text file containing alignment statistics
     */
-    label 'process_mid'
+    label 'process_medium'
 
     input:
         tuple val(samplename), path(bam), path(bai)
@@ -296,7 +296,7 @@ process vcftools {
     @Output:
         Text file containing a measure of heterozygosity
     */
-    label 'process_mid'
+    label 'process_medium'
 
 
     input:
@@ -366,7 +366,7 @@ process bcftools_stats {
         Text file containing a collection of summary statistics
     */
 
-    label 'process_mid'
+    label 'process_medium'
 
     input:
         tuple val(samplename),  path("${samplename}.gvcf.gz"),path("${samplename}.gvcf.gz.tbi")
@@ -398,7 +398,7 @@ process gatk_varianteval {
     @Output:
         Evaluation table containing a collection of summary statistics
     */
-    label 'process_mid'
+    label 'process_medium'
 
     input:
         tuple val(samplename), path("${samplename}.gvcf.gz") ,path("${samplename}.gvcf.gz.tbi")
@@ -440,7 +440,7 @@ process snpeff {
     @Output:
         Evaluation table containing a collection of summary statistics
     */
-    label 'process_mid'
+    label 'process_medium'
 
     input:
         tuple val(samplename), path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi")
diff --git a/modules/local/structural_variant.nf b/modules/local/structural_variant.nf
index a6f58f4..dda67c7 100644
--- a/modules/local/structural_variant.nf
+++ b/modules/local/structural_variant.nf
@@ -1,7 +1,7 @@
 GENOMEREF=file(params.genomes[params.genome].genome)
 GENOME=params.genome
 BWAGENOME=file(params.genomes[params.genome].bwagenome)
-DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) 
+DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS)
 
 
 
@@ -10,7 +10,7 @@ process svaba_somatic {
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai)
-    
+
     output:
         tuple val(tumorname),
         path("${tumor.simpleName}.bps.txt.gz"),
@@ -34,7 +34,7 @@ process svaba_somatic {
     """
 
     stub:
-    
+
     """
     touch "${tumor.simpleName}.bps.txt.gz"
     touch "${tumor.simpleName}.contigs.bam"
@@ -61,7 +61,7 @@ process manta_somatic {
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai)
-    
+
     output:
         tuple val(tumorname),
         path("${tumor.simpleName}.diplodSV.vcf.gz"),
@@ -80,7 +80,7 @@ process manta_somatic {
         --runDir=wd
 
     wd/runWorkflow.py -m local -j 10 -g 10
-    
+
     mv wd/results/variants/diploidSV.vcf.gz ${tumor.simpleName}.diplodSV.vcf.gz
     mv wd/results/variants/somaticSV.vcf.gz ${tumor.simpleName}.somaticSV.vcf.gz
     mv wd/results/variants/candidateSV.vcf.gz ${tumor.simpleName}.candidateSV.vcf.gz
@@ -89,7 +89,7 @@ process manta_somatic {
     """
 
     stub:
-    
+
     """
     touch ${tumor.simpleName}.diplodSV.vcf.gz
     touch ${tumor.simpleName}.somaticSV.vcf.gz
@@ -140,7 +140,7 @@ process manta_tonly {
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai)
-    
+
     output:
         tuple val(tumorname),
         path("${tumor.simpleName}.candidateSV.vcf.gz"),
@@ -158,7 +158,7 @@ process manta_tonly {
         --runDir=wd
 
     wd/runWorkflow.py -m local -j 10 -g 10
-    
+
     mv wd/results/variants/candidateSV.vcf.gz ${tumor.simpleName}.candidateSV.vcf.gz
     mv wd/results/variants/candidateSmallIndels.vcf.gz ${tumor.simpleName}.candidateSmallIndels.vcf.gz
     mv wd/results/variants/tumorSV.vcf.gz ${tumor.simpleName}.tumorSV.vcf.gz
@@ -166,7 +166,7 @@ process manta_tonly {
     """
 
     stub:
-    
+
     """
     touch ${tumor.simpleName}.candidateSV.vcf.gz
     touch ${tumor.simpleName}.candidateSmallIndels.vcf.gz
@@ -182,7 +182,7 @@ process svaba_tonly {
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai)
-    
+
     output:
         tuple val(tumorname),
         path("${tumor.simpleName}.bps.txt.gz"),
@@ -202,7 +202,7 @@ process svaba_tonly {
     """
 
     stub:
-    
+
     """
     touch "${tumor.simpleName}.bps.txt.gz"
     touch "${tumor.simpleName}.contigs.bam"
@@ -221,11 +221,11 @@ process svaba_tonly {
 process gunzip {
 
     input:
-        tuple val(tumorname), 
+        tuple val(tumorname),
         path(vcf), val(sv)
 
     output:
-        tuple val(tumorname), 
+        tuple val(tumorname),
         path("${tumorname}.tumorSV.vcf"), val(sv)
 
     script:
@@ -246,7 +246,7 @@ process survivor_sv {
     module = ['survivor']
 
     input:
-        tuple val(tumorname), 
+        tuple val(tumorname),
         path(vcfs),val(svs)
 
     output:
@@ -275,9 +275,7 @@ process survivor_sv {
 process annotsv_tonly {
      //AnnotSV for Manta/Svaba works with either vcf.gz or .vcf files
      //Requires bedtools,bcftools
-
     module = ['annotsv/3.3.1']
-    publishDir(path: "${outdir}/SV/annotated_tonly", mode: 'copy') 
 
     input:
         tuple val(tumorname), path(somaticvcf), val(sv)
@@ -306,4 +304,4 @@ process annotsv_tonly {
     touch "${sv}/${tumorname}.tsv"
     touch "${sv}/${tumorname}.unannotated.tsv"
     """
-}
\ No newline at end of file
+}
diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf
index 8a69287..1ceb063 100644
--- a/modules/local/trim_align.nf
+++ b/modules/local/trim_align.nf
@@ -4,7 +4,7 @@ KNOWNRECAL = params.genomes[params.genome].KNOWNRECAL
 
 process fastp {
     container = "${params.containers.logan}"
-    label 'process_mid'
+    label 'process_medium'
     tag { name }
 
     input:
@@ -169,8 +169,7 @@ process applybqsr {
 
 process samtoolsindex {
     container = "${params.containers.logan}"
-    label 'process_mid'
-    publishDir(path: "${outdir}/bams/BQSR", mode: 'copy')
+    label 'process_medium'
 
     input:
     tuple val(bamname), path(bam)
@@ -193,7 +192,7 @@ process samtoolsindex {
 //Save to CRAM for output
 process bamtocram_tonly {
     container = "${params.containers.logan}"
-    label 'process_mid'
+    label 'process_medium'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai)
diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index c580c2b..1d8e560 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -224,7 +224,7 @@ process mergemut2stats {
 
 process mutect2filter {
     container = "${params.containers.logan}"
-    label 'process_mid'
+    label 'process_medium'
 
     input:
         tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs),
@@ -687,7 +687,7 @@ process bcftools_index_octopus {
 process combineVariants_strelka {
     //Concat all somatic snvs/indels across all files, strelka separates snv/indels
     container = "${params.containers.logan}"
-    label 'process_mid'
+    label 'process_medium'
 
     input:
         tuple val(sample),
@@ -733,7 +733,7 @@ process combineVariants_strelka {
 
 process somaticcombine {
     container = "${params.containers.logan}"
-    label 'process_mid'
+    label 'process_medium'
 
     input:
         tuple val(tumorsample), val(normal),
@@ -772,7 +772,9 @@ process somaticcombine {
 
 
 process annotvep_tn {
+    label 'process_medium'
     container = "${params.containers.vcf2maf}"
+
     input:
         tuple val(tumorsample), val(normalsample),
         val(vc), path(tumorvcf), path(vcfindex)
@@ -837,9 +839,7 @@ process annotvep_tn {
 
 process combinemafs_tn {
     container = "${params.containers.logan}"
-
     label 'process_low'
-    publishDir(path: "${outdir}/mafs/paired", mode: 'copy')
 
     input:
         path(allmafs)
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index bdc5731..02a3435 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -186,7 +186,7 @@ process mutect2_t_tonly {
 process mutect2filter_tonly {
     container = "${params.containers.logan}"
 
-    label 'process_mid'
+    label 'process_medium'
 
     input:
         tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups),path(tumorcontamination)
@@ -359,9 +359,7 @@ process octopus_tonly {
 
 process somaticcombine_tonly {
     container = "${params.containers.logan}"
-
-    label 'process_mid'
-    publishDir(path: "${outdir}/vcfs/combined_tonly", mode: 'copy')
+    label 'process_medium'
 
     input:
         tuple val(tumorsample),
@@ -396,8 +394,7 @@ process somaticcombine_tonly {
 
 process annotvep_tonly {
     container = "${params.containers.vcf2maf}"
-
-    publishDir("${outdir}/mafs", mode: "copy")
+    label 'process_medium'
 
     input:
         tuple val(tumorsample),

From 8b9fb63e969a1bed786d0f3a95a03f6358f47ff0 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 10 Jan 2024 13:22:16 -0500
Subject: [PATCH 41/58] fix: increase mem for vcs

---
 conf/base.config                       | 5 +++++
 conf/modules.config                    | 7 -------
 modules/local/variant_calling.nf       | 2 +-
 modules/local/variant_calling_tonly.nf | 2 +-
 4 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index e6f151c..0eb1dee 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -56,6 +56,11 @@ process {
         memory = { check_max( 64.GB * task.attempt, 'memory'  ) }
         time   = { check_max( 72.h   * task.attempt, 'time'    ) }
     }
+    withLabel:process_somaticcaller_high {
+        cpus   = { check_max( 16     * task.attempt, 'cpus'    ) }
+        memory = { check_max( 70.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 48.h   * task.attempt, 'time'    ) }
+    }
     withLabel:process_highmem {
         cpus   = { check_max( 4     * task.attempt, 'cpus'    ) }
         memory = { check_max( 48.GB * task.attempt, 'memory'  ) }
diff --git a/conf/modules.config b/conf/modules.config
index f3b5495..a9bb769 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -289,11 +289,4 @@ process {
         ]
     }
 
-
-  withName: 'octopus_tn|octopus_tonly' {
-        memory = 72.GB
-        time = 24.h
-        cpus = 16
-    }
-
 }
diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index 1d8e560..2b87dfa 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -429,7 +429,7 @@ process varscan_tn {
 
 process octopus_tn {
     container = "${params.containers.octopus}"
-    //label 'process_highcpu' Using separate docker for octopus
+    label 'process_somaticcaller_high'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai),
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index 02a3435..9324011 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -326,7 +326,7 @@ process vardict_tonly {
 
 process octopus_tonly {
     container = "${params.containers.octopus}"
-    //label 'process_highcpu'
+    label 'process_somaticcaller_high'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), path(bed)

From 99145a82fb3c06fc321fb246f0a50046d201d87d Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 10 Jan 2024 22:03:35 -0500
Subject: [PATCH 42/58] fix: vc cpus

---
 modules/local/variant_calling.nf       | 40 +++++++++++++-------------
 modules/local/variant_calling_tonly.nf | 36 +++++++++++------------
 2 files changed, 36 insertions(+), 40 deletions(-)

diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index 2b87dfa..2d01e01 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -15,7 +15,7 @@ LOFREQ_CONVERT=params.lofreq_convert
 
 
 process mutect2 {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_somaticcaller'
 
     input:
@@ -56,7 +56,7 @@ process mutect2 {
 
 
 process pileup_paired_t {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_highmem'
 
     input:
@@ -86,7 +86,7 @@ process pileup_paired_t {
 
 
 process pileup_paired_n {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_highmem'
 
     input:
@@ -116,7 +116,7 @@ process pileup_paired_n {
 
 
 process contamination_paired {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_highmem'
 
     input:
@@ -170,7 +170,7 @@ process contamination_paired {
 
 
 process learnreadorientationmodel {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_highmem'
 
     input:
@@ -196,7 +196,7 @@ process learnreadorientationmodel {
 
 
 process mergemut2stats {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_low'
 
     input:
@@ -223,7 +223,7 @@ process mergemut2stats {
 
 
 process mutect2filter {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_medium'
 
     input:
@@ -274,7 +274,7 @@ process mutect2filter {
 
 
 process strelka_tn {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_highcpu'
     input:
         tuple val(tumorname), path(tumor), path(tumorbai),
@@ -329,7 +329,7 @@ process strelka_tn {
 
 
 process vardict_tn {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_highcpu'
 
     input:
@@ -379,7 +379,7 @@ process vardict_tn {
 
 
 process varscan_tn {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_somaticcaller'
 
     input:
@@ -428,7 +428,7 @@ process varscan_tn {
 
 
 process octopus_tn {
-    container = "${params.containers.octopus}"
+    container "${params.containers.octopus}"
     label 'process_somaticcaller_high'
 
     input:
@@ -463,7 +463,7 @@ process octopus_tn {
 
 
 process lofreq_tn {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_somaticcaller'
 
     input:
@@ -523,7 +523,7 @@ process lofreq_tn {
 
 
 process muse_tn {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_somaticcaller'
     input:
         tuple val(tumorname), path(tumor), path(tumorbai),
@@ -560,7 +560,7 @@ process muse_tn {
 
 
 process combineVariants {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_highmem'
 
     input:
@@ -610,7 +610,7 @@ process combineVariants {
 
 
 process combineVariants_alternative {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_highmem'
 
     input:
@@ -658,7 +658,7 @@ process combineVariants_alternative {
 
 
 process bcftools_index_octopus {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_low'
 
     input:
@@ -686,7 +686,7 @@ process bcftools_index_octopus {
 
 process combineVariants_strelka {
     //Concat all somatic snvs/indels across all files, strelka separates snv/indels
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_medium'
 
     input:
@@ -732,7 +732,7 @@ process combineVariants_strelka {
 
 
 process somaticcombine {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_medium'
 
     input:
@@ -773,7 +773,7 @@ process somaticcombine {
 
 process annotvep_tn {
     label 'process_medium'
-    container = "${params.containers.vcf2maf}"
+    container "${params.containers.vcf2maf}"
 
     input:
         tuple val(tumorsample), val(normalsample),
@@ -838,7 +838,7 @@ process annotvep_tn {
 
 
 process combinemafs_tn {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_low'
 
     input:
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index 9324011..fb4253e 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -13,7 +13,7 @@ GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest
 
 
 process pileup_paired_tonly {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
 
     label 'process_highmem'
 
@@ -45,7 +45,7 @@ process pileup_paired_tonly {
 
 
 process contamination_tumoronly {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
 
     label 'process_highmem'
 
@@ -85,7 +85,7 @@ process contamination_tumoronly {
 
 
 process learnreadorientationmodel_tonly {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
 
     label 'process_highmem'
 
@@ -115,7 +115,7 @@ process learnreadorientationmodel_tonly {
 
 
 process mergemut2stats_tonly {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
 
     label 'process_low'
 
@@ -144,9 +144,9 @@ process mergemut2stats_tonly {
 
 
 process mutect2_t_tonly {
-    container = "${params.containers.logan}"
-
+    container "${params.containers.logan}"
     label 'process_somaticcaller'
+
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), path(bed)
 
@@ -184,7 +184,7 @@ process mutect2_t_tonly {
 
 
 process mutect2filter_tonly {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
 
     label 'process_medium'
 
@@ -219,7 +219,7 @@ process mutect2filter_tonly {
         --output ${sample}.tonly.mut2.final.vcf.gz
 
     bcftools sort ${sample}.tonly.mut2.final.vcf.gz |\
-    bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
+    bcftools norm --threads ${task.cpus} --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\
         sed '/^\$/d' |\
     bcftools view - -Oz -o  ${sample}.tonly.mut2.norm.vcf.gz
@@ -237,7 +237,7 @@ process mutect2filter_tonly {
 
 
 process varscan_tonly {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
 
     label 'process_somaticcaller'
     input:
@@ -277,7 +277,7 @@ process varscan_tonly {
 
 
 process vardict_tonly {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
 
     label 'process_highcpu'
     input:
@@ -297,7 +297,7 @@ process vardict_tonly {
         -x 500 \
         --nosv \
         -b ${tumor} --fisher \
-        -t -Q 20 -c 1 -S 2 -E 3 --th $task.cpus \
+        -t -Q 20 -c 1 -S 2 -E 3 --th ${task.cpus} \
         temp_${bed} | var2vcf_valid.pl \
             -N ${tumor} \
             -Q 20 \
@@ -325,7 +325,7 @@ process vardict_tonly {
 
 
 process octopus_tonly {
-    container = "${params.containers.octopus}"
+    container "${params.containers.octopus}"
     label 'process_somaticcaller_high'
 
     input:
@@ -336,20 +336,16 @@ process octopus_tonly {
         path("${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz")
 
     script:
-
     """
     octopus -R $GENOMEREF -C cancer -I ${tumor} \
     --annotations AC AD DP \
     --target-working-memory 64Gb \
     -t ${bed} \
     $SOMATIC_FOREST \
-    -o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz --threads $task.cpus
-
-
+    -o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz --threads ${task.cpus}
     """
 
     stub:
-
     """
     touch ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz
     """
@@ -358,7 +354,7 @@ process octopus_tonly {
 
 
 process somaticcombine_tonly {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_medium'
 
     input:
@@ -393,7 +389,7 @@ process somaticcombine_tonly {
 }
 
 process annotvep_tonly {
-    container = "${params.containers.vcf2maf}"
+    container "${params.containers.vcf2maf}"
     label 'process_medium'
 
     input:
@@ -457,7 +453,7 @@ process annotvep_tonly {
 }
 
 process combinemafs_tonly {
-    container = "${params.containers.logan}"
+    container "${params.containers.logan}"
     label 'process_low'
 
     input:

From c08e15697fab72f0760a620894ae5f1604da8fe3 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Fri, 12 Jan 2024 11:11:56 -0500
Subject: [PATCH 43/58] fix: increase mem for vardict

---
 conf/modules.config                    | 3 +--
 modules/local/variant_calling.nf       | 2 +-
 modules/local/variant_calling_tonly.nf | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index a9bb769..d171cef 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -15,7 +15,6 @@ process {
         ]
     }
 
-
     withName: 'freec_paired' {
         publishDir = [
             path: { "${params.outdir}/cnv/freec_paired" },
@@ -23,7 +22,7 @@ process {
         ]
     }
 
-    withName:'freec' {
+    withName: 'freec' {
         publishDir = [
             path: { "${params.outdir}/cnv/freec_unpaired" },
             mode: 'copy'
diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index 2d01e01..ef25413 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -330,7 +330,7 @@ process strelka_tn {
 
 process vardict_tn {
     container "${params.containers.logan}"
-    label 'process_highcpu'
+    label 'process_somaticcaller_high'
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed)
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index fb4253e..7cc8cc7 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -278,8 +278,8 @@ process varscan_tonly {
 
 process vardict_tonly {
     container "${params.containers.logan}"
+    label 'process_somaticcaller_high'
 
-    label 'process_highcpu'
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), path(bed)
 

From dfce46994d773b8b383b26dea881dd4f0d078819 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Fri, 12 Jan 2024 15:46:41 -0500
Subject: [PATCH 44/58] style: spacing

---
 modules/local/variant_calling_tonly.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index 7cc8cc7..c063f2a 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -185,7 +185,6 @@ process mutect2_t_tonly {
 
 process mutect2filter_tonly {
     container "${params.containers.logan}"
-
     label 'process_medium'
 
     input:

From 92fb74a65232adf77b17d6e0bb2bb0ee4ff4d1da Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Mon, 15 Jan 2024 11:31:01 -0500
Subject: [PATCH 45/58] feat: revert to GATK3

---
 conf/containers.config                 |  2 +-
 modules/local/variant_calling.nf       | 71 +++++++++++++++++++++++++-
 modules/local/variant_calling_tonly.nf | 66 +++++++++++++++++++++++-
 subworkflows/local/workflows.nf        | 12 +++--
 subworkflows/local/workflows_tonly.nf  |  8 +--
 5 files changed, 149 insertions(+), 10 deletions(-)

diff --git a/conf/containers.config b/conf/containers.config
index 2ceaf3c..c929fec 100644
--- a/conf/containers.config
+++ b/conf/containers.config
@@ -2,7 +2,7 @@
 params {
     containers {
         base = 'nciccbr/ccbr_ubuntu_base_20.04:v6.1'
-        logan = 'docker://dnousome/ccbr_logan_base:v0.3.3'
+        logan = 'docker://dnousome/ccbr_logan_base:v0.3.4'
         vcf2maf = 'docker://dnousome/ccbr_vcf2maf:v102.0.0'
         octopus = 'docker://dancooke/octopus:latest'
 
diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index ef25413..03b17b7 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -221,6 +221,32 @@ process mergemut2stats {
 
 }
 
+process octopus_convertvcf {
+    container "${params.containers.logan}"
+    label 'process_low'
+    
+    input:
+        tuple val(tumor), val(normal), 
+        val(oct), path(vcf), path(vcfindex)
+
+    output:
+        tuple val(tumor), val(normal), path("${tumor}.octopus.norm.vcf.gz"), 
+        path("${tumor}.octopus.norm.vcf.gz.tbi")
+
+
+    script:
+    """
+    zcat ${vcf}  | sed 's/^##fileformat=VCFv4.3/##fileformat=VCFv4.2/'  > ${tumor}_temp.octopus.norm.vcf
+    bgzip ${tumor}_temp.octopus.norm.vcf
+    mv ${tumor}_temp.octopus.norm.vcf.gz ${tumor}.octopus.norm.vcf.gz
+    bcftools index -t ${tumor}.octopus.norm.vcf.gz -f
+    """
+
+    stub:
+    """
+    touch ${tumor}.octopus.norm.vcf.gz ${tumor}.octopus.norm.vcf.gz.tbi
+    """
+}
 
 process mutect2filter {
     container "${params.containers.logan}"
@@ -731,6 +757,49 @@ process combineVariants_strelka {
 }
 
 
+process somaticcombine {
+    container "${params.containers.logan}"
+    label 'process_medium'
+
+    input:
+        tuple val(tumorsample), val(normal),
+        val(callers),
+        path(vcfs), path(vcfindex)
+
+    output:
+        tuple val(tumorsample), val(normal),
+        path("${tumorsample}_vs_${normal}_combined.vcf.gz"),
+        path("${tumorsample}_vs_${normal}_combined.vcf.gz.tbi")
+
+    script:
+        vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
+        vcfin2="-V:" + vcfin1.join(" -V:")
+
+    """
+    /usr/lib/jvm/java-8-openjdk-amd64/bin/java -jar \$GATK_JAR -T CombineVariants  \
+        -R $GENOMEREF \
+        --genotypemergeoption PRIORITIZE \
+        --rod_priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \
+        --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED \
+        -o ${tumorsample}_vs_${normal}_combined.vcf.gz \
+        $vcfin2
+        
+    """
+
+    stub:
+    vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
+    vcfin2="-V:" + vcfin1.join(" -V:")
+
+    """
+    touch ${tumorsample}_vs_${normal}_combined.vcf.gz
+    touch ${tumorsample}_vs_${normal}_combined.vcf.gz.tbi
+    """
+
+}
+
+
+
+/*DISCVR
 process somaticcombine {
     container "${params.containers.logan}"
     label 'process_medium'
@@ -769,7 +838,7 @@ process somaticcombine {
     """
 
 }
-
+*/
 
 process annotvep_tn {
     label 'process_medium'
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index c063f2a..3ad791c 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -352,6 +352,70 @@ process octopus_tonly {
 
 
 
+process octopus_convertvcf_tonly {
+    container "${params.containers.logan}"
+    label 'process_low'
+    
+    input:
+        tuple val(tumor), val(oct), path(vcf), path(vcfindex)
+
+    output:
+        tuple val(tumor), path("${tumor}.octopus_tonly.norm.vcf.gz"), 
+        path("${tumor}.octopus_tonly.norm.vcf.gz.tbi")
+
+
+    script:
+    """
+    zcat ${vcf}  | sed 's/^##fileformat=VCFv4.3/##fileformat=VCFv4.2/'  > ${tumor}_temp.octopus_tonly.norm.vcf
+    bgzip ${tumor}_temp.octopus_tonly.norm.vcf
+    mv ${tumor}_temp.octopus_tonly.norm.vcf.gz ${tumor}.octopus_tonly.norm.vcf.gz
+    bcftools index -t ${tumor}.octopus_tonly.norm.vcf.gz -f
+    """
+
+    stub:
+    """
+    touch ${tumor}.octopus_tonly.norm.vcf.gz ${tumor}.octopus_tonly.norm.vcf.gz.tbi
+    """
+}
+
+
+process somaticcombine_tonly {
+    container "${params.containers.logan}"
+    label 'process_medium'
+
+    input:
+        tuple val(tumorsample),
+        val(callers),
+        path(vcfs), path(vcfindex)
+
+    output:
+        tuple val(tumorsample),
+        path("${tumorsample}_combined_tonly.vcf.gz"),
+        path("${tumorsample}_combined_tonly.vcf.gz.tbi")
+
+    script:
+        vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
+        vcfin2="-V:" + vcfin1.join(" -V:")
+
+    """
+    /usr/lib/jvm/java-8-openjdk-amd64/bin/java -jar \$GATK_JAR -T CombineVariants  \
+        -R $GENOMEREF \
+        --genotypemergeoption PRIORITIZE \
+        --rod_priority_list mutect2_tonly,octopus_tonly,vardict_tonly,varscan_tonly \
+        --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED \
+        -o ${tumorsample}_combined_tonly.vcf.gz \
+        $vcfin2
+    """
+
+    stub:
+    """
+    touch ${tumorsample}_combined_tonly.vcf.gz ${tumorsample}_combined_tonly.vcf.gz.tbi
+    """
+
+}
+
+
+/*DISCVRSeq
 process somaticcombine_tonly {
     container "${params.containers.logan}"
     label 'process_medium'
@@ -386,7 +450,7 @@ process somaticcombine_tonly {
     """
 
 }
-
+*/
 process annotvep_tonly {
     container "${params.containers.vcf2maf}"
     label 'process_medium'
diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf
index 766a41a..d7fdcfe 100644
--- a/subworkflows/local/workflows.nf
+++ b/subworkflows/local/workflows.nf
@@ -19,7 +19,7 @@ include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n;
     contamination_paired; learnreadorientationmodel;mergemut2stats;
     strelka_tn; combineVariants_strelka;
     varscan_tn; vardict_tn; lofreq_tn; muse_tn;
-    octopus_tn; bcftools_index_octopus; bcftools_index_octopus as bcftools_index_octopus_tonly;
+    octopus_tn; bcftools_index_octopus; bcftools_index_octopus as bcftools_index_octopus_tonly; octopus_convertvcf; 
     combineVariants as combineVariants_vardict; combineVariants as combineVariants_vardict_tonly;
     combineVariants as combineVariants_varscan; combineVariants as combineVariants_varscan_tonly;
     combineVariants_alternative as combineVariants_lofreq; combineVariants as combineVariants_muse;
@@ -34,7 +34,7 @@ include {mutect2_t_tonly; mutect2filter_tonly;
     varscan_tonly; vardict_tonly; octopus_tonly;
     contamination_tumoronly;
     learnreadorientationmodel_tonly;
-    mergemut2stats_tonly;
+    mergemut2stats_tonly; octopus_convertvcf_tonly;
     annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict;
     annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus;
     annotvep_tonly as annotvep_tonly_combined;
@@ -314,6 +314,8 @@ workflow VC {
         | map{samplename,marked,markedindex,normvcf,normindex ->
             tuple(samplename.split('_vs_')[0],samplename.split('_vs_')[1],"octopus",normvcf,normindex)}
     annotvep_tn_octopus(octopus_in)
+    octopus_in_sc = octopus_in | octopus_convertvcf
+        |  map{tumor,vcf,vcfindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} 
 
     //Octopus TOnly
     octopus_in_tonly=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed->
@@ -324,15 +326,17 @@ workflow VC {
         | join(sample_sheet) |
         map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"octopus_tonly",normvcf,normindex)}
     annotvep_tonly_octopus(octopus_in_tonly)
+    octopus_in_tonly_sc=octopus_in_tonly | octopus_convertvcf_tonly
+        | map{tumor,vcf,vcfindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} 
 
     //Combine All Variants Using VCF and Then Reannotate
-    mutect2_in|concat(strelka_in)|concat(octopus_in)|concat(muse_in)|concat(lofreq_in)
+    mutect2_in|concat(strelka_in)|concat(octopus_in_sc)|concat(muse_in)|concat(lofreq_in)
         | concat(vardict_in) |concat(varscan_in) | groupTuple(by:[0,1])
         | somaticcombine
         | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)}
         | annotvep_tn_combined
 
-    mutect2_in_tonly|concat(octopus_in_tonly)
+    mutect2_in_tonly|concat(octopus_in_tonly_sc)
         | concat(vardict_in_tonly)|concat(varscan_in_tonly) | groupTuple()
         | somaticcombine_tonly
         | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)}
diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf
index 9f19e1a..9673e9c 100644
--- a/subworkflows/local/workflows_tonly.nf
+++ b/subworkflows/local/workflows_tonly.nf
@@ -32,7 +32,7 @@ include {mutect2_t_tonly; mutect2filter_tonly; pileup_paired_tonly;
     octopus_tonly; 
     contamination_tumoronly;
     learnreadorientationmodel_tonly; 
-    mergemut2stats_tonly;
+    mergemut2stats_tonly; octopus_convertvcf_tonly;
     annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; 
     annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus;
     annotvep_tonly as annotvep_tonly_combined;
@@ -196,9 +196,11 @@ workflow VC_TONLY {
         | combineVariants_alternative | join(sample_sheet)
         | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} 
     annotvep_tonly_octopus(octopus_in_tonly)
+    octopus_in_tonly_sc=octopus_in_tonly | octopus_convertvcf_tonly 
+        | map{tumor,normvcf,normindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} 
 
-
-    mutect2_tonly_in | concat(octopus_in_tonly)
+    //Combined Variants and Annotated
+    mutect2_tonly_in | concat(octopus_in_tonly_sc)
         | concat(vardict_in_tonly) | concat(varscan_in_tonly)
         | groupTuple()
         | somaticcombine_tonly 

From 37ecadf6c979b9441adea1fe0e94ed3b56d9739a Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Tue, 23 Jan 2024 11:15:01 -0500
Subject: [PATCH 46/58] fix: vcf input order

---
 subworkflows/local/workflows.nf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf
index d7fdcfe..883f4ca 100644
--- a/subworkflows/local/workflows.nf
+++ b/subworkflows/local/workflows.nf
@@ -314,8 +314,8 @@ workflow VC {
         | map{samplename,marked,markedindex,normvcf,normindex ->
             tuple(samplename.split('_vs_')[0],samplename.split('_vs_')[1],"octopus",normvcf,normindex)}
     annotvep_tn_octopus(octopus_in)
-    octopus_in_sc = octopus_in | octopus_convertvcf
-        |  map{tumor,vcf,vcfindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} 
+    octopus_in_sc = octopus_in | octopus_convertvcf 
+        |  map{tumor,normal,vcf,vcfindex ->tuple(tumor,normal,"octopus",vcf,vcfindex)} 
 
     //Octopus TOnly
     octopus_in_tonly=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed->
@@ -327,10 +327,10 @@ workflow VC {
         map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"octopus_tonly",normvcf,normindex)}
     annotvep_tonly_octopus(octopus_in_tonly)
     octopus_in_tonly_sc=octopus_in_tonly | octopus_convertvcf_tonly
-        | map{tumor,vcf,vcfindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} 
+        | map{tumor,vcf,vcfindex ->tuple(tumor,"octopus_tonly",vcf,vcfindex)} 
 
     //Combine All Variants Using VCF and Then Reannotate
-    mutect2_in|concat(strelka_in)|concat(octopus_in_sc)|concat(muse_in)|concat(lofreq_in)
+    mutect2_in|concat(strelka_in) | concat(octopus_in_sc) | concat(muse_in) | concat(lofreq_in)
         | concat(vardict_in) |concat(varscan_in) | groupTuple(by:[0,1])
         | somaticcombine
         | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)}

From 5df8e6206d631767c712c419a786fe3b53e8283b Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Tue, 23 Jan 2024 16:43:39 -0500
Subject: [PATCH 47/58] fix: rename output

---
 subworkflows/local/workflows.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf
index 883f4ca..0b509ef 100644
--- a/subworkflows/local/workflows.nf
+++ b/subworkflows/local/workflows.nf
@@ -285,9 +285,9 @@ workflow VC {
     //VarScan TOnly
     varscan_in_tonly=bambyinterval.combine(contamination_paired.out)
     | map{tumor,bam,bai,normal,nbam,nbai,bed,tumorname2,tpile,npile,tumorc,normalc ->
-            tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly  | groupTuple()
-    | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf/)[0][1].toInteger()},"varscan_tonly")}
-    | combineVariants_varscan_tonly
+            tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly  | groupTuple 
+    | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf.gz/)[0][1].toInteger()},"varscan_tonly")} 
+    | combineVariants_varscan_tonly 
     | join(sample_sheet)
     | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"varscan_tonly",normvcf,normindex)}
     annotvep_tonly_varscan(varscan_in_tonly)

From d01739fa33efbb24900afc26476822e54ddabdb1 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 24 Jan 2024 16:49:34 -0500
Subject: [PATCH 48/58] fix: fixed combine mode

---
 modules/local/variant_calling.nf |  1 -
 subworkflows/local/workflows.nf  | 12 ++++++------
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index 03b17b7..e0b44c7 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -411,7 +411,6 @@ process varscan_tn {
     input:
         tuple val(tumorname), path(tumor), path(tumorbai),
         val(normalname), path(normal), path(normalbai), path(bed),
-        val(tumor1),
         path(tumorpileup), path(normalpileup),
         path(tumor_con_table), path(normal_con_table)
 
diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf
index 0b509ef..6c4f201 100644
--- a/subworkflows/local/workflows.nf
+++ b/subworkflows/local/workflows.nf
@@ -275,18 +275,19 @@ workflow VC {
     annotvep_tonly_vardict(vardict_in_tonly)
 
     //VarScan TN
-    varscan_in=bambyinterval.combine(contamination_paired.out)
-        | varscan_tn | groupTuple(by:[0,1])
+    varscan_in=bambyinterval.combine(contamination_paired.out,by:0) 
+        | varscan_tn | groupTuple(by:[0,1]) 
         | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).varscan.vcf.gz/)[0][1].toInteger()},"varscan")}
         | combineVariants_varscan | join(sample_sheet_paired)
         | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)}
     annotvep_tn_varscan(varscan_in)
 
+    
     //VarScan TOnly
-    varscan_in_tonly=bambyinterval.combine(contamination_paired.out)
-    | map{tumor,bam,bai,normal,nbam,nbai,bed,tumorname2,tpile,npile,tumorc,normalc ->
+    varscan_in_tonly=bambyinterval.combine(contamination_paired.out,by:0) 
+    | map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc ->
             tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly  | groupTuple 
-    | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf.gz/)[0][1].toInteger()},"varscan_tonly")} 
+    | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf.gz/)[0][1].toInteger()},"varscan_tonly")}  
     | combineVariants_varscan_tonly 
     | join(sample_sheet)
     | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"varscan_tonly",normvcf,normindex)}
@@ -347,7 +348,6 @@ workflow VC {
     emit:
         somaticcall_input=octopus_in
 
-
 }
 
 

From 220ebb283e75c58dfa641b8e11765b98c7bb58ac Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Mon, 12 Feb 2024 19:59:48 -0500
Subject: [PATCH 49/58] feat: added lofreq separate container for htslib update

---
 conf/containers.config           | 2 +-
 modules/local/variant_calling.nf | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/containers.config b/conf/containers.config
index c929fec..c47abbb 100644
--- a/conf/containers.config
+++ b/conf/containers.config
@@ -4,7 +4,7 @@ params {
         base = 'nciccbr/ccbr_ubuntu_base_20.04:v6.1'
         logan = 'docker://dnousome/ccbr_logan_base:v0.3.4'
         vcf2maf = 'docker://dnousome/ccbr_vcf2maf:v102.0.0'
+        lofreq = 'docker://dnousome/ccbr_lofreq:v0.0.1'
         octopus = 'docker://dancooke/octopus:latest'
-
     }
 }
diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index e0b44c7..952cf95 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -488,7 +488,7 @@ process octopus_tn {
 
 
 process lofreq_tn {
-    container "${params.containers.logan}"
+    container "${params.containers.lofreq}"
     label 'process_somaticcaller'
 
     input:

From 5efe9af9811cdd6bcb564dc7a4c4cae9ba2fe109 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Tue, 5 Mar 2024 14:09:05 -0500
Subject: [PATCH 50/58] fix: increase memory for vc

---
 conf/base.config            |  4 ++--
 modules/local/qc.nf         | 27 ++++++++-------------------
 modules/local/trim_align.nf |  5 -----
 3 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 0eb1dee..0b5fd0c 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -58,8 +58,8 @@ process {
     }
     withLabel:process_somaticcaller_high {
         cpus   = { check_max( 16     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 70.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 48.h   * task.attempt, 'time'    ) }
+        memory = { check_max( 96.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 72.h   * task.attempt, 'time'    ) }
     }
     withLabel:process_highmem {
         cpus   = { check_max( 4     * task.attempt, 'cpus'    ) }
diff --git a/modules/local/qc.nf b/modules/local/qc.nf
index f8bbc89..7732405 100644
--- a/modules/local/qc.nf
+++ b/modules/local/qc.nf
@@ -146,18 +146,13 @@ process fastqc {
     @Output:
         FastQC report and zip file containing sequencing quality information
     """
-
-
     input:
         tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bai")
     output:
         tuple val(samplename), path("${samplename}_fastqc.html"), path("${samplename}_fastqc.zip")
 
-    //message: "Running FastQC with {threads} threads on '{input}' input file"
-    //threads: 8
-    //module=['fastqc/0.11.9']
-
     script:
+
     """
     mkdir -p fastqc
     fastqc -t 8 \
@@ -404,14 +399,6 @@ process gatk_varianteval {
         tuple val(samplename), path("${samplename}.gvcf.gz") ,path("${samplename}.gvcf.gz.tbi")
     output:
         path("${samplename}.germline.eval.grp")
-    //params:
-     //   rname    = "vareval",
-      //  genome   = config['references']['GENOME'],
-       // dbsnp    = config['references']['DBSNP'],
-      //  ver_gatk = config['tools']['gatk4']['version']
-    //message: "Running GATK4 VariantEval on '{input.vcf}' input file"
-    //container: config['images']['wes_base']
-    //threads: 16
     script:
     """
     gatk --java-options '-Xmx12g -XX:ParallelGCThreads=16' VariantEval \
@@ -475,6 +462,12 @@ process somalier_extract {
         Mapped and pre-processed BAM file
     @Output:
         Exracted sites in (binary) somalier format
+    
+    params:
+        sites_vcf = config['references']['SOMALIER']['SITES_VCF'],
+        genomeFasta = config['references']['GENOME'],
+        rname = 'somalier_extract'
+    container: config['images']['wes_base']
     */
     label 'process_low'
 
@@ -482,11 +475,7 @@ process somalier_extract {
         tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai")
     output:
         path("output/${samplename}.somalier")
-    //params:
-    //    sites_vcf = config['references']['SOMALIER']['SITES_VCF'],
-    //    genomeFasta = config['references']['GENOME'],
-    //    rname = 'somalier_extract'
-    //container: config['images']['wes_base']
+
     script:
     """
     mkdir -p output
diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf
index 1ceb063..bcab724 100644
--- a/modules/local/trim_align.nf
+++ b/modules/local/trim_align.nf
@@ -209,11 +209,6 @@ process bamtocram_tonly {
 
 /*
 process indelrealign {
-    //Briefly, RealignerTargetCreator runs faster with increasing -nt threads,
-    //while IndelRealigner shows diminishing returns for increasing scatter
-
-    tag { name }
-
     input:
     tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai")
 

From 2caff97589c2e2c0195fd7d6067b52ee136df5fd Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 6 Mar 2024 10:09:41 -0500
Subject: [PATCH 51/58] fix: add the set column for vcf2maf

---
 modules/local/variant_calling.nf       | 2 +-
 modules/local/variant_calling_tonly.nf | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index 952cf95..3b1386f 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -460,7 +460,6 @@ process octopus_tn {
         tuple val(tumorname), path(tumor), path(tumorbai),
         val(normalname), path(normal), path(normalbai), path(bed)
 
-
     output:
         tuple val("${tumorname}_vs_${normalname}"),
         path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz")
@@ -893,6 +892,7 @@ process annotvep_tn {
     --vep-path /opt/vep/src/ensembl-vep \
     --vep-data !{VEPCACHEDIR} \
     --ncbi-build !{VEPBUILD} --species !{VEPSPECIES} --ref-fasta !{GENOMEREF} \
+    --retain-info "set" \
     --vep-overwrite
 
     '''
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index 3ad791c..e36622d 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -503,6 +503,7 @@ process annotvep_tonly {
     --vep-path /opt/vep/src/ensembl-vep \
     --vep-data !{VEPCACHEDIR} \
     --ncbi-build !{VEPBUILD} --species !{VEPSPECIES} --ref-fasta !{GENOMEREF} \
+    --retain-info "set" \
     --vep-overwrite
 
 

From 98d7cb025a1cf0ff85abf208496a06cfe401851a Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 6 Mar 2024 10:18:02 -0500
Subject: [PATCH 52/58] fix: stub tests

---
 .github/workflows/tests.yaml | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index f6a9ea6..11e91d1 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -14,18 +14,22 @@ jobs:
     - uses: actions/checkout@v2
     - uses: docker://nextflow/nextflow:22.10.8
     - uses: actions/setup-python@v4
-      with:
-        python-version: '3.9' 
     - name: Tumor-normal FastQ Stub Run
       run: |
         docker run -v $PWD:/opt2 nextflow/nextflow:22.10.8 \
-        python /opt2/wgs-seek --fastq "/opt2/.tests/*R{1,2}.fastq.gz" \
-        --mode vc \
-        --output /opt2/output_tn_fqs \
-        --paired --sample_sheet "/opt2/.tests/pairs.tsv" --stub
+        nextflow run --fastq_input "/opt2/.tests/*R{1,2}.fastq.gz" \
+        -profile ci_stub \
+        --vc --cnv --sv \
+        --genome hg38 \
+        --outdir /opt2/output_tn_fqs \
+        --sample_sheet "/opt2/.tests/pairs.tsv" \
+        --stub
     - name: Tumor-only FastQ Stub Run
       run: |
         docker run -v $PWD:/opt2 nextflow/nextflow:22.10.8 \
-        python /opt2/wgs-seek --fastq "/opt2/.tests/*R{1,2}.fastq.gz" \
-        --mode vc \
-        --output "/opt2/output_tonly_fqs" --stub
\ No newline at end of file
+        nextflow run --fastq_input "/opt2/.tests/*R{1,2}.fastq.gz" \
+        -profile ci_stub \
+        --vc --cnv --sv \
+        --genome hg38 \
+        --outdir /opt2/output_tn_fqs \
+        --stub

From 0cac32fdb3c572e6185a08d0504f5e553790e7cb Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 6 Mar 2024 10:18:46 -0500
Subject: [PATCH 53/58] fix: action changes

---
 .github/workflows/tests.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 11e91d1..7cff41e 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -13,7 +13,6 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - uses: docker://nextflow/nextflow:22.10.8
-    - uses: actions/setup-python@v4
     - name: Tumor-normal FastQ Stub Run
       run: |
         docker run -v $PWD:/opt2 nextflow/nextflow:22.10.8 \

From 11894af7df58e4a7b47f82a5937c5d5193bed6b4 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 6 Mar 2024 10:34:05 -0500
Subject: [PATCH 54/58] docs: citation add

---
 CITATION.cff | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CITATION.cff b/CITATION.cff
index e52b1c9..817be73 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -3,6 +3,7 @@ message: "Please cite LOGAN as below." # TODO set up Zenodo to archive your tool
 authors: # TODO: author names should match those in pyproject.toml
   - family-names: Nousome
     given-names: Darryl
+    orcid: https://orcid.org/0000-0002-5259-8599
   - family-names: Sovacool
     given-names: Kelly
     orcid: https://orcid.org/0000-0003-3283-829X

From e1d1356e45abb0332d617f6435e488f722eaaed0 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 6 Mar 2024 10:53:07 -0500
Subject: [PATCH 55/58] fix: simplify testing

---
 .github/workflows/build.yml  |  9 ++++++++-
 .github/workflows/tests.yaml | 34 ----------------------------------
 .tests/interval.bed          |  0
 3 files changed, 8 insertions(+), 35 deletions(-)
 delete mode 100644 .github/workflows/tests.yaml
 create mode 100644 .tests/interval.bed

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index e91bde8..e6e4582 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -36,4 +36,11 @@ jobs:
           mkdir tmp && cd tmp
           which logan
           logan init
-          logan run -profile ci_stub,docker -stub
+          logan run -profile ci_stub,docker \
+          --fastq_input "/opt2/.tests/*R{1,2}.fastq.gz" \
+          --vc --cnv --sv \
+          --genome hg38 \
+          --outdir /opt2/output_tn_fqs \
+          --interval /opt2/.tests/interval.bed \
+          -stub
+
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
deleted file mode 100644
index 7cff41e..0000000
--- a/.github/workflows/tests.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-name: tests
-
-on:
-  push:
-    branches:
-      - main
-  pull_request:
-    branches_ignore: []
-
-jobs:
-  Stub_Run:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-    - uses: docker://nextflow/nextflow:22.10.8
-    - name: Tumor-normal FastQ Stub Run
-      run: |
-        docker run -v $PWD:/opt2 nextflow/nextflow:22.10.8 \
-        nextflow run --fastq_input "/opt2/.tests/*R{1,2}.fastq.gz" \
-        -profile ci_stub \
-        --vc --cnv --sv \
-        --genome hg38 \
-        --outdir /opt2/output_tn_fqs \
-        --sample_sheet "/opt2/.tests/pairs.tsv" \
-        --stub
-    - name: Tumor-only FastQ Stub Run
-      run: |
-        docker run -v $PWD:/opt2 nextflow/nextflow:22.10.8 \
-        nextflow run --fastq_input "/opt2/.tests/*R{1,2}.fastq.gz" \
-        -profile ci_stub \
-        --vc --cnv --sv \
-        --genome hg38 \
-        --outdir /opt2/output_tn_fqs \
-        --stub
diff --git a/.tests/interval.bed b/.tests/interval.bed
new file mode 100644
index 0000000..e69de29

From 862f1b6dd675113e94d12af8b4d3653f10c658bb Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 6 Mar 2024 12:13:37 -0500
Subject: [PATCH 56/58] docs: changelog

---
 CHANGELOG.md                |  5 ++++-
 modules/local/copynumber.nf | 18 +++++++++---------
 modules/local/qc.nf         |  3 ---
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f8ecdbb..4fa28d1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,6 @@
 # LOGAN development version
 
-- Python-based CLI
\ No newline at end of file
+- Changed over to Nextflow CCBR template and pip packaging
+    - Processes moved to `modules/local` directory
+    - Workflows under the `subworkflows/local` directory
+    - Processes fall under low/med/high, but adding a somaticvariant caller process
diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf
index 0f4a92b..ef47cc7 100644
--- a/modules/local/copynumber.nf
+++ b/modules/local/copynumber.nf
@@ -3,15 +3,15 @@ SEQUENZAGC=file(params.genomes[params.genome].SEQUENZAGC)
 SEQUENZA_SCRIPT=params.script_sequenza
 
 if (params.genome=="mm10"){
-FREECLENGTHS=file(params.genomes[params.genome].FREEC.FREECLENGTHS)
-FREECCHROMS=file(params.genomes[params.genome].FREEC.FREECCHROMS)
-FREECPILEUP=file(params.genomes[params.genome].FREEC.FREECPILEUP)
-FREECSNPS = file(params.genomes[params.genome].FREEC.FREECSNPS)
-FREECTARGETS=file(params.genomes[params.genome].intervals)
-FREECSCRIPT = params.script_freec
-FREECPAIR_SCRIPT = params.script_freecpaired
-FREECSIGNIFICANCE = params.freec_significance
-FREECPLOT = params.freec_plot
+    FREECLENGTHS=file(params.genomes[params.genome].FREEC.FREECLENGTHS)
+    FREECCHROMS=file(params.genomes[params.genome].FREEC.FREECCHROMS)
+    FREECPILEUP=file(params.genomes[params.genome].FREEC.FREECPILEUP)
+    FREECSNPS = file(params.genomes[params.genome].FREEC.FREECSNPS)
+    FREECTARGETS=file(params.genomes[params.genome].intervals)
+    FREECSCRIPT = params.script_freec
+    FREECPAIR_SCRIPT = params.script_freecpaired
+    FREECSIGNIFICANCE = params.freec_significance
+    FREECPLOT = params.freec_plot
 }
 
 GERMLINEHET="/data/SCLC-BRAINMETS/cn/copy_number/GermlineHetPon.38.vcf.gz"
diff --git a/modules/local/qc.nf b/modules/local/qc.nf
index 7732405..501fce6 100644
--- a/modules/local/qc.nf
+++ b/modules/local/qc.nf
@@ -18,7 +18,6 @@ SCRIPT_PATH_PCA = file(params.script_ancestry)
 //OUTPUT DIRECTORY
 process fc_lane {
     container = "${params.containers.logan}"
-
     label 'process_low'
 
     input:
@@ -46,8 +45,6 @@ process fc_lane {
 
 process fastq_screen {
     //Uses Trimmed Files
-
-
     input:
     tuple val(samplename),
         path("${samplename}.R1.trimmed.fastq.gz"),

From 4c530157f6bf275722ae1450369f9431bde31147 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 6 Mar 2024 14:59:34 -0500
Subject: [PATCH 57/58] refactor: delete python script

---
 logan | 314 ----------------------------------------------------------
 1 file changed, 314 deletions(-)
 delete mode 100755 logan

diff --git a/logan b/logan
deleted file mode 100755
index 9a72893..0000000
--- a/logan
+++ /dev/null
@@ -1,314 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: UTF-8 -*-
-
-"""
-ABOUT: This is the main entry for the LOGAN (whole genome sequencing pipeline).
-REQUIRES:
-  - python>=3.5
-  - nextflow
-  - singularity
-DISCLAIMER:
-                    PUBLIC DOMAIN NOTICE
-        CCR Collaborative Bioinformatics Resource (CCBR)
-                National Cancer Institute (NCI)
-This software/database is a "United  States Government Work" under
-the terms of the United  States Copyright Act.  It was written as
-part of the author's official duties as a United States Government
-employee and thus cannot be copyrighted. This software is freely
-available to the public for use.
-Although all  reasonable  efforts have been taken  to ensure  the
-accuracy and reliability of the software and data, CCBR do not and
-cannot warrant the performance or results that may  be obtained by
-using this software or data. CCBR and NCI disclaim all warranties,
-express  or  implied,  including   warranties   of   performance,
-merchantability or fitness for any particular purpose.
-Please cite the author and the "NIH Biowulf Cluster" in any work or
-product based on this material.
-
-
-    PIPELINE TYPE
-    Align --PIPE_ALIGN-TRIM ALIGN
-    Variant Calls--PIPE_VC-Variant calling step after align
-    Germline Calls DV--PIPE_GERMLINE-Germline after align
-    QC requires Alignment, Germline--PIPE_QC--After everything
-    --PIPE_BAMVC-BAM variant calling only
-    --PIPE_TONLY_TRIM-Trim and Align
-    --PIPE_TONLY_TRIM-Trim and Align
-"""
-
-# Python standard library
-import argparse, os, time, sys, subprocess, re, json
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(description="Input files")
-    parser.add_argument("--fastq", help="FQ Inputs")
-    parser.add_argument(
-        "--file_input",
-        help="TSV file of all fastq files used for input with 3 Columns Sample Name, Pair1, Pair2",
-    )
-    parser.add_argument("--bam", help="Glob of all the BAM files []")
-    parser.add_argument("--sample_sheet", help="Sample sheet and required for Paired")
-    parser.add_argument("--splitregions", default=24, help="How splits per regions")
-    parser.add_argument("--vc", help="Add Somatic VC calling", action="store_true")
-    parser.add_argument("--cnv", help="Add CNV calling", action="store_true")
-    parser.add_argument(
-        "--sv", help="Add Structural Variant calling", action="store_true"
-    )
-    parser.add_argument("--germline", help="Add Germline VC", action="store_true")
-    parser.add_argument(
-        "--qc",
-        help="Add QC Steps (Requires Germline Calling as well)",
-        action="store_true",
-    )
-    parser.add_argument("--output", help="Output Directory")
-    parser.add_argument("--genome", help="hg38, mm10")
-    parser.add_argument("--profile", help="Biowulf or Local Run")
-    parser.add_argument(
-        "--resume", action="store_true", default="True", help="Resume previous run?"
-    )
-    parser.add_argument("--submit", action="store_true", help="Submit to SLURM?")
-    parser.add_argument("--stub", action="store_true", help="Stub run")
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = parse_args()
-    dirname = os.path.dirname(os.path.realpath(__file__))
-    outdirname = os.path.basename(os.getcwd())
-    c1 = "#!/usr/bin/bash"
-    c2 = "module load nextflow"
-    c3 = "module load singularity"
-    # Paired Mode-> either align/VC/SV/CNV/germline(QC as well) with FASTQ
-    if args.sample_sheet:
-        sample_path = "--sample_sheet '" + args.sample_sheet + "'"
-        ##Input Section
-        if args.fastq:
-            in1 = "--fastq_input '" + args.fastq + "'"
-        elif args.file_input:
-            in1 = "--file_input " + args.file_input
-        elif args.bam:
-            in1 = "--bam_input '" + args.bam + "'"
-            baminput = True
-        else:
-            print(
-                "Missing sample sheet for paired mode or you would like Tumor only mode?"
-            )
-        alignmode = "--PIPE_ALIGN"
-        if args.vc and args.bam:
-            vcmode = "--PIPE_BAMVC"
-        elif args.vc:
-            vcmode = "--PIPE_VC"
-        if args.sv and args.bam:
-            svmode = "--PIPE_BAMSV"
-        elif args.sv:
-            svmode = "--PIPE_SV"
-        if args.cnv and args.bam:
-            cnvmode = "--PIPE_BAMCNV"
-        elif args.cnv:
-            cnvmode = "--PIPE_CNV"
-        if args.germline and args.bam:
-            germmode = "--PIPE_BAMGERMLINE"
-        elif args.germline:
-            germmode = "--PIPE_GERMLINE"
-        if args.qc and args.germline:
-            qcmode = "--PIPE_QC_GL"
-        elif args.qc:
-            qcmode = "--PIPE_QC_NOGL"
-    else:
-        ##SET DEFAULT for Tumor-Only Modes//Tumor Only Mode (No sample sheet)
-        alignmode = "--PIPE_TONLY_ALIGN"
-        qcmode = "--PIPE_TONLY_QC"
-        if (
-            args.file_input and re.search(r".bam", open(args.file_input, "r").read())
-        ) or args.bam:
-            baminput = True
-        sample_path = ""
-        if args.vc:
-            if args.fastq:
-                vcmode = "--PIPE_TONLY_VC"
-                in1 = "--fastq_input '" + args.fastq + "'"
-            elif args.bam:
-                vcmode = "--PIPE_TONLY_BAMVC"
-                in1 = "--bam_input '" + args.bam + "'"
-            elif args.file_input:
-                in1 = "--file_input " + args.file_input
-                bamin = re.search(r".bam", open(args.file_input, "r").read())
-                if bamin:
-                    vcmode = "--PIPE_TONLY_BAMVC"
-                else:
-                    vcmode = "--PIPE_TONLY_VC"
-        if args.sv:
-            if args.fastq:
-                svmode = "--PIPE_TONLY_SV"
-                in1 = "--fastq_input '" + args.fastq + "'"
-            elif args.bam:
-                svmode = "--PIPE_TONLY_BAMSV"
-                in1 = "--bam_input '" + args.bam + "'"
-            elif args.file_input:
-                in1 = "--file_input " + args.file_input
-                bamin = re.search(r".bam", open(args.file_input, "r").read())
-                if bamin:
-                    svmode = "--PIPE_TONLY_BAMSV"
-                else:
-                    svmode = "--PIPE_TONLY_SV"
-        if args.cnv:
-            if args.fastq:
-                cnvmode = "--PIPE_TONLY_CNV"
-                in1 = "--fastq_input '" + args.fastq + "'"
-            elif args.bam:
-                cnvmode = "--PIPE_TONLY_BAMCNV"
-                in1 = "--bam_input '" + args.bam + "'"
-            elif args.file_input:
-                in1 = "--file_input " + args.file_input
-                bamin = re.search(r".bam", open(args.file_input, "r").read())
-                if bamin:
-                    cnvmode = "--PIPE_TONLY_BAMCNV"
-                else:
-                    cnvmode = "--PIPE_TONLY_CNV"
-        if args.qc:
-            if args.fastq:
-                in1 = "--fastq_input '" + args.fastq + "'"
-            elif args.file_input:
-                in1 = "--file_input " + args.file_input
-    if args.stub and args.profile is None:
-        profile = "-profile localstub"
-        splitreg = "4"
-    elif args.profile == "local":
-        profile = "-profile local"
-        splitreg = str(args.splitregions)
-    elif args.profile == "biowulf" or args.profile is None:
-        profile = "-profile biowulf"
-        splitreg = str(args.splitregions)
-    if args.resume:
-        resume = "-resume"
-    else:
-        resume = ""
-    ###COMBINE ALL COMMANDS (PIPE ALIGN)
-    commandbase = [
-        "nextflow run",
-        dirname + "/main.nf",
-        "-c " + dirname + "/nextflow.config",
-        in1,
-        profile,
-        resume,
-        sample_path,
-        "--genome",
-        args.genome,
-        "--output '" + args.output + "'" + " --split_regions " + splitreg,
-    ]
-    ##FINAL COMMANDS
-    if not "baminput" in locals():
-        commandalign = commandbase + [alignmode]
-        cmd1 = " ".join(commandalign)
-    else:
-        cmd1 = ""
-    if args.vc:
-        commandvc = commandbase + [vcmode]
-        cmd2 = " ".join(commandvc)
-    else:
-        cmd2 = ""
-    if args.sv:
-        commandsv = commandbase + [svmode]
-        cmd3 = " ".join(commandsv)
-    else:
-        cmd3 = ""
-    if args.cnv:
-        commandcnv = commandbase + [cnvmode]
-        cmd4 = " ".join(commandcnv)
-    else:
-        cmd4 = ""
-    if args.germline:
-        commandgl = commandbase + [germmode]
-        cmd5 = " ".join(commandgl)
-    else:
-        cmd5 = ""
-    if args.qc:
-        commandqc = commandbase + [qcmode]
-        cmd6 = " ".join(commandqc)
-    else:
-        cmd6 = ""
-    code = (
-        c1
-        + "\n"
-        + c2
-        + "\n"
-        + c3
-        + "\n"
-        + cmd1
-        + "\n"
-        + cmd2
-        + "\n"
-        + cmd3
-        + "\n"
-        + cmd4
-        + "\n"
-        + cmd5
-        + "\n"
-        + cmd6
-    )
-    time1 = time.strftime("%Y_%m_%d_%H%M")
-    stubbase = " -stub -without-podman T -without-conda -without-docker"
-    if args.stub:
-        if not "baminput" in locals():
-            cmd1_stub = cmd1 + stubbase
-        else:
-            cmd1_stub = ""
-        if args.vc:
-            cmd2_stub = cmd2 + stubbase
-        else:
-            cmd2_stub = ""
-        if args.sv:
-            cmd3_stub = cmd3 + stubbase
-        else:
-            cmd3_stub = ""
-        if args.cnv:
-            cmd4_stub = cmd4 + stubbase
-        else:
-            cmd4_stub = ""
-        if args.germline:
-            cmd5_stub = cmd5 + stubbase
-        else:
-            cmd5_stub = ""
-        if args.qc:
-            cmd6_stub = cmd6 + stubbase
-        else:
-            cmd6_stub = ""
-        cmd_stub = (
-            cmd1_stub
-            + "\n"
-            + cmd2_stub
-            + "\n"
-            + cmd3_stub
-            + "\n"
-            + cmd4_stub
-            + "\n"
-            + cmd5_stub
-            + "\n"
-            + cmd6_stub
-        )
-        print(cmd_stub)
-        os.system(cmd_stub)
-    else:
-        outswarmmut = args.output + "_" + time1 + ".slurm"
-        with open(outswarmmut, "a") as outfile:
-            outfile.write(code + "\n")
-        sbatch_mut = (
-            "sbatch --cpus-per-task=2 --mem=8g --time 10-00:00:00 --partition norm --output submit_"
-            + time1
-            + ".log --error error_"
-            + time1
-            + ".log --mail-type=BEGIN,END "
-            + outswarmmut
-        )
-        sbatch_out = "kickoff_" + time1 + ".sh"
-        with open(sbatch_out, "a") as outfile:
-            outfile.write(sbatch_mut + "\n")
-        print(sbatch_mut)
-        if args.submit:
-            os.system(sbatch_mut)
-
-
-if __name__ == "__main__":
-    main()

From 18a471a4778ee9d73f673a29db326de3d25230ba Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Thu, 7 Mar 2024 10:34:06 -0500
Subject: [PATCH 58/58] fix: change indel reference

---
 conf/genomes.config | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/genomes.config b/conf/genomes.config
index 3d0843a..fde6bcc 100644
--- a/conf/genomes.config
+++ b/conf/genomes.config
@@ -9,10 +9,10 @@ params {
             intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed"
             //millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
             //shapeitindel =  "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) //
-            KNOWNINDELS= '/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_indel.vcf'
+            KNOWNINDELS= '/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz'
             KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz'
             dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz"
-            dbsnp_indel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_indel.vcf"
+            dbsnp_indel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
             gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz
             pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/updatedpon.vcf.gz"    //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon}
             kgp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/1000G_phase1.snps.high_confidence.hg38.vcf.gz"