From 5a439a96dad961496f84dba555147941091bb041 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Mon, 11 Dec 2023 09:51:34 -0500 Subject: [PATCH 01/58] fix: driver script added temporarily --- subworkflows/local/workflows.nf | 265 ++++++++++++++++---------------- wgs-seek | 152 ------------------ 2 files changed, 131 insertions(+), 286 deletions(-) delete mode 100755 wgs-seek diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 436d677..96713b5 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -1,4 +1,4 @@ -//All Worksflows in One Place +//All Worksflows in One Place intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') @@ -9,38 +9,38 @@ include {fc_lane; fastq_screen;kraken;qualimap_bamqc;fastqc; somalier_extract;somalier_analysis_human;somalier_analysis_mouse; multiqc} from '../../modules/local/qc.nf' -include {fastp; bwamem2; //indelrealign; +include {fastp; bwamem2; //indelrealign; bqsr; gatherbqsr; applybqsr; samtoolsindex} from '../../modules/local/trim_align.nf' -include {deepvariant_step1;deepvariant_step2;deepvariant_step3; +include {deepvariant_step1; deepvariant_step2; deepvariant_step3; deepvariant_combined;glnexus} from '../../modules/local/germline.nf' -include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; +include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; contamination_paired; learnreadorientationmodel;mergemut2stats; - strelka_tn; combineVariants_strelka; + strelka_tn; combineVariants_strelka; varscan_tn; vardict_tn; lofreq_tn; muse_tn; octopus_tn; bcftools_index_octopus; bcftools_index_octopus as bcftools_index_octopus_tonly; - combineVariants as combineVariants_vardict; combineVariants as combineVariants_vardict_tonly; + combineVariants as combineVariants_vardict; combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan; combineVariants as combineVariants_varscan_tonly; combineVariants_alternative as combineVariants_lofreq; combineVariants as combineVariants_muse; combineVariants_alternative as combineVariants_octopus; combineVariants_alternative as combineVariants_octopus_tonly; - annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; + annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; annotvep_tn as annotvep_tn_octopus; annotvep_tn as annotvep_tn_lofreq; annotvep_tn as annotvep_tn_muse; annotvep_tn as annotvep_tn_combined; combinemafs_tn; somaticcombine} from '../../modules/local/variant_calling.nf' -include {mutect2_t_tonly; mutect2filter_tonly; +include {mutect2_t_tonly; mutect2filter_tonly; varscan_tonly; vardict_tonly; octopus_tonly; contamination_tumoronly; - learnreadorientationmodel_tonly; + learnreadorientationmodel_tonly; mergemut2stats_tonly; - annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; + annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; annotvep_tonly as annotvep_tonly_combined; combinemafs_tonly;somaticcombine_tonly} from '../../modules/local/variant_calling_tonly.nf' -include {svaba_somatic; manta_somatic; +include {svaba_somatic; manta_somatic; survivor_sv; gunzip; annotsv_tn as annotsv_survivor_tn annotsv_tn as annotsv_svaba;annotsv_tn as annotsv_manta} from '../../modules/local/structural_variant.nf' @@ -53,17 +53,17 @@ include {splitinterval} from '../../modules/local/splitbed.nf' workflow INPUT { - + if(params.fastq_input){ fastqinput=Channel.fromFilePairs(params.fastq_input) }else if(params.file_input) { fastqinput=Channel.fromPath(params.file_input) .splitCsv(header: false, sep: "\t", strip:true) - .map{ sample,fq1,fq2 -> + .map{ sample,fq1,fq2 -> tuple(sample, tuple(file(fq1),file(fq2))) } } - + if(params.sample_sheet){ sample_sheet=Channel.fromPath(params.sample_sheet, checkIfExists: true).view() .ifEmpty { "sample sheet not found" } @@ -73,10 +73,9 @@ workflow INPUT { row.Normal ) } - }else{ + } else { sample_sheet=fastqinput.map{samplename,f1 -> tuple ( samplename)} - } emit: @@ -89,28 +88,28 @@ workflow ALIGN { take: fastqinput sample_sheet - main: + main: fastp(fastqinput) splitinterval(intervalbedin) - + bwamem2(fastp.out) //indelrealign(bwamem2.out) - //indelbambyinterval=indelrealign.out.combine(splitinterval.out.flatten()) + bqsrbambyinterval=bwamem2.out.combine(splitinterval.out.flatten()) bambyinterval=bwamem2.out.combine(splitinterval.out.flatten()) - - + + bqsr(bqsrbambyinterval) bqsrs=bqsr.out.groupTuple() - .map { samplename,beds -> tuple( samplename, + .map { samplename,beds -> tuple( samplename, beds.toSorted{ it -> (it.name =~ /${samplename}_(.*?).recal_data.grp/)[0][1].toInteger() } ) } gatherbqsr(bqsrs) tobqsr=bwamem2.out.combine(gatherbqsr.out,by:0) - applybqsr(tobqsr) + applybqsr(tobqsr) //sample_sheet.view() bamwithsample=applybqsr.out.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(applybqsr.out,by:0).map{it.swap(3,0)} @@ -132,40 +131,40 @@ workflow GL { take: bambyinterval main: - deepvariant_step1(bambyinterval) + deepvariant_step1(bambyinterval) deepvariant_1_sorted=deepvariant_step1.out.groupTuple() - .map { samplename,tfbeds,gvcfbed -> tuple( samplename, + .map { samplename,tfbeds,gvcfbed -> tuple( samplename, tfbeds.toSorted{ it -> (it.name =~ /${samplename}.tfrecord_(.*?).bed.gz/)[0][1].toInteger() } , gvcfbed.toSorted{ it -> (it.name =~ /${samplename}.gvcf.tfrecord_(.*?).bed.gz/)[0][1].toInteger() } ) } - deepvariant_step2(deepvariant_1_sorted) | deepvariant_step3 - glin=deepvariant_step3.out.map{samplename,vcf,vcf_tbi,gvcf,gvcf_tbi -> gvcf}.collect() + deepvariant_step2(deepvariant_1_sorted) | deepvariant_step3 + glin=deepvariant_step3.out.map{samplename,vcf,vcf_tbi,gvcf,gvcf_tbi -> gvcf}.collect() glnexus(glin) emit: glnexusout=glnexus.out bcfout=deepvariant_step3.out - + } - + workflow VC { take: //Input is the BAMby interval bamwithsample splitout sample_sheet - - main: + + main: //Create Pairing for TN (in case of dups) sample_sheet_paired=sample_sheet|map{tu,no -> tuple ("${tu}_vs_${no}",tu, no)} bambyinterval=bamwithsample.combine(splitout.flatten()) - //Paired Mutect2 + //Paired Mutect2 mutect2(bambyinterval) pileup_paired_t(bambyinterval) pileup_paired_n(bambyinterval) - + pileup_paired_tout=pileup_paired_t.out.groupTuple() .map{samplename,pileups-> tuple( samplename, pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tumor.pileup.table/)[0][1].toInteger() } , @@ -177,153 +176,153 @@ workflow VC { pileup_paired_all=pileup_paired_tout.join(pileup_paired_nout) - contamination_paired(pileup_paired_all) + contamination_paired(pileup_paired_all) //Mutect2 TN mutect2.out.groupTuple(by:[0,1]) - | multiMap { tumor,normal,vcfs,f1r2,stats -> + | multiMap { tumor,normal,vcfs,f1r2,stats -> mut2out_lor: tuple("${tumor}_vs_${normal}", f1r2.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } ) mut2out_mstats: tuple( "${tumor}_vs_${normal}", stats.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).mut2.vcf.gz.stats/)[0][1].toInteger() }) allmut2tn: tuple( "${tumor}_vs_${normal}", vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).mut2.vcf.gz/)[0][1].toInteger() } ) - } + } | set{mut2out} - + learnreadorientationmodel(mut2out.mut2out_lor) mergemut2stats(mut2out.mut2out_mstats) - + mutect2_in=mut2out.allmut2tn | join(mergemut2stats.out) - | join(learnreadorientationmodel.out) - | map{t,vcf,stats,ro -> tuple(t.split('_vs_')[0],t.split('_vs_')[1],vcf,stats,ro)} - | join(contamination_paired.out) + | join(learnreadorientationmodel.out) + | map{t,vcf,stats,ro -> tuple(t.split('_vs_')[0],t.split('_vs_')[1],vcf,stats,ro)} + | join(contamination_paired.out) | mutect2filter - | join(sample_sheet_paired) - | map{sample,markedvcf,markedindex,normvcf,normindex,stats,tumor,normal -> tuple(tumor,normal,"mutect2",normvcf,normindex)} + | join(sample_sheet_paired) + | map{sample,markedvcf,markedindex,normvcf,normindex,stats,tumor,normal -> tuple(tumor,normal,"mutect2",normvcf,normindex)} annotvep_tn_mut2(mutect2_in) //Mutect2 Tumor Only bambyinterval_t=bambyinterval.map{tumorname,tumor,tumorbai,normalname,normalbam,normalbai,bed ->tuple(tumorname,tumor,tumorbai,bed)} - mutect2_t_tonly(bambyinterval_t) - + mutect2_t_tonly(bambyinterval_t) + mutect2_t_tonly.out.groupTuple() - | multiMap { tumor,vcfs,f1r2,stats -> + | multiMap { tumor,vcfs,f1r2,stats -> mut2tout_lor: tuple(tumor, f1r2.toSorted{ it -> (it.name =~ /${tumor}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } ) mut2tonly_mstats: tuple( tumor, stats.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() }) allmut2tonly: tuple(tumor, vcfs.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } ) - } + } | set{mut2tonlyout} - + learnreadorientationmodel_tonly(mut2tonlyout.mut2tout_lor) mergemut2stats_tonly(mut2tonlyout.mut2tonly_mstats) contamination_tumoronly(pileup_paired_tout) - + mutect2_in_tonly=mut2tonlyout.allmut2tonly | join(mergemut2stats_tonly.out) | join(learnreadorientationmodel_tonly.out) - | join(contamination_tumoronly.out) + | join(contamination_tumoronly.out) | mutect2filter_tonly | join(sample_sheet) - | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,"mutect2_tonly",normvcf,normindex)} + | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,"mutect2_tonly",normvcf,normindex)} annotvep_tonly_mut2(mutect2_in_tonly) - - //Strelka TN + + //Strelka TN strelka_in=strelka_tn(bambyinterval) | groupTuple(by:[0,1]) | map { tumor,normal,vcfs,vcfindex,indels,indelindex -> tuple("${tumor}_vs_${normal}", vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.snvs.vcf.gz/)[0][1].toInteger() },vcfindex, - indels.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() } ,indelindex)} + indels.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() } ,indelindex)} | combineVariants_strelka | join(sample_sheet_paired) - | map{sample,markedvcf,markedindex,finalvcf,finalindex,tumor,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)} + | map{sample,markedvcf,markedindex,finalvcf,finalindex,tumor,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)} annotvep_tn_strelka(strelka_in) //Vardict TN vardict_in=vardict_tn(bambyinterval) | groupTuple(by:[0,1]) - | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).vardict.vcf/)[0][1].toInteger()},"vardict")} + | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).vardict.vcf/)[0][1].toInteger()},"vardict")} | combineVariants_vardict | join(sample_sheet_paired) - | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)} + | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)} annotvep_tn_vardict(vardict_in) //VarDict TOnly - vardict_in_tonly=bambyinterval + vardict_in_tonly=bambyinterval | map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed -> - tuple(tumorname,tumorbam,tumorbai,bed)} + tuple(tumorname,tumorbam,tumorbai,bed)} | vardict_tonly | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.vardict.vcf/)[0][1].toInteger()},"vardict_tonly")} | combineVariants_vardict_tonly | join(sample_sheet) - | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"vardict_tonly",normvcf,normindex)} + | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"vardict_tonly",normvcf,normindex)} annotvep_tonly_vardict(vardict_in_tonly) - + //VarScan TN - varscan_in=bambyinterval.combine(contamination_paired.out) - | varscan_tn | groupTuple(by:[0,1]) - | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).varscan.vcf.gz/)[0][1].toInteger()},"varscan")} + varscan_in=bambyinterval.combine(contamination_paired.out) + | varscan_tn | groupTuple(by:[0,1]) + | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).varscan.vcf.gz/)[0][1].toInteger()},"varscan")} | combineVariants_varscan | join(sample_sheet_paired) - | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} + | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} annotvep_tn_varscan(varscan_in) - + //VarScan TOnly - varscan_in_tonly=bambyinterval.combine(contamination_paired.out) + varscan_in_tonly=bambyinterval.combine(contamination_paired.out) | map{tumor,bam,bai,normal,nbam,nbai,bed,tumorname2,tpile,npile,tumorc,normalc -> - tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly | groupTuple() + tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf/)[0][1].toInteger()},"varscan_tonly")} | combineVariants_varscan_tonly | join(sample_sheet) - | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"varscan_tonly",normvcf,normindex)} + | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"varscan_tonly",normvcf,normindex)} annotvep_tonly_varscan(varscan_in_tonly) - + //Lofreq TN - lofreq_in=lofreq_tn(bambyinterval) | groupTuple(by:[0,1]) - | map{tu,no,snv,dbsnv,indel,dbindel,vcf,vcfindex-> tuple("${tu}_vs_${no}",vcf.toSorted{it -> (it.name =~ /${tu}_vs_${no}_(.*?)_lofreq.vcf.gz/)[0][1].toInteger()},vcfindex,"lofreq")} + lofreq_in=lofreq_tn(bambyinterval) | groupTuple(by:[0,1]) + | map{tu,no,snv,dbsnv,indel,dbindel,vcf,vcfindex-> tuple("${tu}_vs_${no}",vcf.toSorted{it -> (it.name =~ /${tu}_vs_${no}_(.*?)_lofreq.vcf.gz/)[0][1].toInteger()},vcfindex,"lofreq")} | combineVariants_lofreq | join(sample_sheet_paired) - | map{sample,marked,markedindex,normvcf,normindex,tumor,normal->tuple(tumor,normal,"lofreq",normvcf,normindex)} + | map{sample,marked,markedindex,normvcf,normindex,tumor,normal->tuple(tumor,normal,"lofreq",normvcf,normindex)} annotvep_tn_lofreq(lofreq_in) //MuSE TN - muse_in=muse_tn(bamwithsample) - | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf,"muse")} + muse_in=muse_tn(bamwithsample) + | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf,"muse")} | combineVariants_muse | join(sample_sheet_paired) - | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"muse",normvcf,normindex)} + | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"muse",normvcf,normindex)} annotvep_tn_muse(muse_in) //Octopus TN - octopus_in=octopus_tn(bambyinterval) | bcftools_index_octopus - | groupTuple() + octopus_in=octopus_tn(bambyinterval) | bcftools_index_octopus + | groupTuple() | map{samplename,vcf,vcfindex-> tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus")} - | combineVariants_octopus - | map{samplename,marked,markedindex,normvcf,normindex -> + | combineVariants_octopus + | map{samplename,marked,markedindex,normvcf,normindex -> tuple(samplename.split('_vs_')[0],samplename.split('_vs_')[1],"octopus",normvcf,normindex)} - annotvep_tn_octopus(octopus_in) + annotvep_tn_octopus(octopus_in) //Octopus TOnly octopus_in_tonly=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed-> - tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly - | groupTuple() + tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly + | groupTuple() | map{samplename,vcf,vcfindex->tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).tonly.octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus_tonly")} - | combineVariants_octopus_tonly + | combineVariants_octopus_tonly | join(sample_sheet) | - map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"octopus_tonly",normvcf,normindex)} + map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"octopus_tonly",normvcf,normindex)} annotvep_tonly_octopus(octopus_in_tonly) //Combine All Variants Using VCF and Then Reannotate mutect2_in|concat(strelka_in)|concat(octopus_in)|concat(muse_in)|concat(lofreq_in) | concat(vardict_in) |concat(varscan_in) | groupTuple(by:[0,1]) - | somaticcombine - | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)} + | somaticcombine + | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)} | annotvep_tn_combined mutect2_in_tonly|concat(octopus_in_tonly) - | concat(vardict_in_tonly)|concat(varscan_in_tonly) | groupTuple() - | somaticcombine_tonly - | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} + | concat(vardict_in_tonly)|concat(varscan_in_tonly) | groupTuple() + | somaticcombine_tonly + | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} | annotvep_tonly_combined - + //Implement PCGR Annotator/CivIC Next emit: @@ -336,18 +335,18 @@ workflow VC { workflow SV { take: bamwithsample - - main: + + main: //Svaba svaba_out=svaba_somatic(bamwithsample) .map{ tumor,bps,contigs,discord,alignents,gindel,gsv,so_indel,so_sv,unfil_gindel,unfil_gsv,unfil_so_indel,unfil_sv,log -> - tuple(tumor,so_sv,"svaba")} + tuple(tumor,so_sv,"svaba")} annotsv_svaba(svaba_out).ifEmpty("Empty SV input--No SV annotated") //Manta manta_out=manta_somatic(bamwithsample) - .map{tumor,gsv,so_sv,unfil_sv,unfil_indel -> - tuple(tumor,so_sv,"manta")} + .map{tumor,gsv,so_sv,unfil_sv,unfil_indel -> + tuple(tumor,so_sv,"manta")} annotsv_manta(manta_out).ifEmpty("Empty SV input--No SV annotated") //Delly-WIP @@ -361,20 +360,20 @@ workflow SV { workflow CNVmouse { take: bamwithsample - - main: + + main: //Sequenza (Preferred for Paired) chrs=Channel.fromList(params.genomes[params.genome].chromosomes) - seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> + seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} seqzin.combine(chrs) | seqz_sequenza_bychr - seqz_sequenza_bychr.out.groupTuple() + seqz_sequenza_bychr.out.groupTuple() .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} - | sequenza - + | sequenza + //FREEC Paired Mode bamwithsample | freec_paired - + } workflow CNVhuman { @@ -382,25 +381,25 @@ workflow CNVhuman { bamwithsample somaticcall_input - main: + main: //Sequenza chrs=Channel.fromList(params.genomes[params.genome].chromosomes) - seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> + seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} seqzin.combine(chrs) | seqz_sequenza_bychr - seqz_sequenza_bychr.out.groupTuple() + seqz_sequenza_bychr.out.groupTuple() .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} - | sequenza + | sequenza //Purple bamwithsample | amber_tn bamwithsample | cobalt_tn purplein=amber_tn.out.join(cobalt_tn.out) - purplein.join(somaticcall_input)| - map{t1,amber,cobalt,n1,vc,vcf,vcfindex -> tuple(t1,amber,cobalt,vcf,vcfindex)} + purplein.join(somaticcall_input)| + map{t1,amber,cobalt,n1,vc,vcf,vcfindex -> tuple(t1,amber,cobalt,vcf,vcfindex)} | purple - -} + +} /* //baminput=sample_sheet @@ -409,7 +408,7 @@ workflow CNVhuman { //somaticinput=sample_sheet // .map{samplename,bam,vcf-> tuple(samplename,file(vcf))} - + */ @@ -430,14 +429,14 @@ workflow QC_NOGL { qualimap_bamqc(applybqsr) samtools_flagstats(applybqsr) fastqc(applybqsr) - + //Somalier - somalier_extract(applybqsr) + somalier_extract(applybqsr) som_in=somalier_extract.out.collect() //Prep for MultiQC input fclane_out=fc_lane.out.map{samplename,info->info}.collect() - fqs_out=fastq_screen.out.collect() + fqs_out=fastq_screen.out.collect() kraken_out=kraken.out.map{samplename,taxa,krona -> tuple(taxa,krona)}.collect() qualimap_out=qualimap_bamqc.out.map{genome,rep->tuple(genome,rep)}.collect() @@ -445,11 +444,11 @@ workflow QC_NOGL { samtools_flagstats_out=samtools_flagstats.out.collect() - if(params.genome=="hg38"){ + if(params.genome=="hg38"){ somalier_analysis_human(som_in) somalier_analysis_out=somalier_analysis_human.out.collect() } - else if(params.genome=="mm10"){ + else if(params.genome=="mm10"){ somalier_analysis_mouse(som_in) somalier_analysis_out=somalier_analysis_mouse.out.collect() } @@ -466,8 +465,8 @@ workflow QC_GL { fastqin fastpout applybqsr - glnexusout - bcfout + glnexusout + bcfout main: //QC Steps @@ -487,23 +486,23 @@ workflow QC_GL { gatk_varianteval(bcfin) snpeff(bcfin) //Somalier - somalier_extract(applybqsr) + somalier_extract(applybqsr) som_in=somalier_extract.out.collect() //Prep for MultiQC input - if(params.genome=="hg38"){ + if(params.genome=="hg38"){ somalier_analysis_human(som_in) somalier_analysis_out=somalier_analysis_human.out.collect() } - else if(params.genome=="mm10"){ + else if(params.genome=="mm10"){ somalier_analysis_mouse(som_in) somalier_analysis_out=somalier_analysis_mouse.out.collect() } fclane_out=fc_lane.out.map{samplename,info->info}.collect() - fqs_out=fastq_screen.out.collect() + fqs_out=fastq_screen.out.collect() kraken_out=kraken.out.map{samplename,taxa,krona -> tuple(taxa,krona)}.collect() qualimap_out=qualimap_bamqc.out.map{genome,rep->tuple(genome,rep)}.collect() @@ -523,7 +522,7 @@ workflow QC_GL { //Variant Calling from BAM only workflow INPUT_BAM { - + if(params.sample_sheet){ sample_sheet=Channel.fromPath(params.sample_sheet, checkIfExists: true) .ifEmpty { "sample sheet not found" } @@ -533,9 +532,9 @@ workflow INPUT_BAM { row.Normal ) } - } - - //Either BAM Input or File sheet input + } + + //Either BAM Input or File sheet input if(params.bam_input){ //Check if Index is .bai or .bam.bai bambai=params.bam_input +".bai" @@ -553,25 +552,23 @@ workflow INPUT_BAM { .map{it-> tuple(it.simpleName,it)} .join(bai) } - + }else if(params.file_input) { baminputonly=Channel.fromPath(params.file_input) .splitCsv(header: false, sep: "\t", strip:true) - .map{ sample,bam,bai -> + .map{ sample,bam,bai -> tuple(sample, file(bam),file(bai)) } } - + splitinterval(intervalbedin) - + bamwithsample=baminputonly.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(baminputonly,by:0).map{it.swap(3,0)} - + emit: bamwithsample splitout=splitinterval.out sample_sheet } - - diff --git a/wgs-seek b/wgs-seek deleted file mode 100755 index 14794db..0000000 --- a/wgs-seek +++ /dev/null @@ -1,152 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: UTF-8 -*- - -""" -ABOUT: This is the main entry for the LOGAN (whole genome sequencing pipeline). -REQUIRES: - - python>=3.5 - - nextflow - - singularity -DISCLAIMER: - PUBLIC DOMAIN NOTICE - CCR Collaborative Bioinformatics Resource (CCBR) - National Cancer Institute (NCI) -This software/database is a "United States Government Work" under -the terms of the United States Copyright Act. It was written as -part of the author's official duties as a United States Government -employee and thus cannot be copyrighted. This software is freely -available to the public for use. -Although all reasonable efforts have been taken to ensure the -accuracy and reliability of the software and data, CCBR do not and -cannot warrant the performance or results that may be obtained by -using this software or data. CCBR and NCI disclaim all warranties, -express or implied, including warranties of performance, -merchantability or fitness for any particular purpose. -Please cite the author and the "NIH Biowulf Cluster" in any work or -product based on this material. - - - PIPELINE TYPE - Align --PIPE_ALIGN-TRIM ALIGN - Variant Calls--PIPE_VC-Variant calling step after align - Germline Calls DV--PIPE_GERMLINE-Germline after align - QC requires Alignment, Germline--PIPE_QC--After everything - --PIPE_BAMVC-BAM variant calling only - --PIPE_TONLY_TRIM-Trim and Align - --PIPE_TONLY_TRIM-Trim and Align -""" - -import argparse, os, time, sys, subprocess, re, json - -def parse_args(): - parser = argparse.ArgumentParser(description='Input files') - parser.add_argument('--fastq',help='FQ Inputs') - parser.add_argument('--filelist',help="Files input") - parser.add_argument('--bam',help="Files input") - parser.add_argument('--mode',help='Mode?') - parser.add_argument('--paired',help='Paired',action="store_true") - parser.add_argument('--splitregions',default=24,help="How splits per regions") - parser.add_argument('--sv',help="Add Structural VC calling",action="store_true") - parser.add_argument('--output',help="Output Directory") - parser.add_argument('--sample_sheet',help="Sample sheet") - parser.add_argument('--profile',help="Biowulf or Local Run") - parser.add_argument('--resume',action="store_true",default="True",help="Resume previous run?") - parser.add_argument('--submit',action="store_true",help="Submit to SLURM?") - parser.add_argument('--stub',action="store_true",help="Stub run") - args = parser.parse_args() - return(args) - - - -def main(): - args=parse_args() - dirname = os.path.dirname(os.path.realpath(__file__)) - outdirname = os.path.basename(os.getcwd()) - c1="#!/usr/bin/bash" - c2="module load nextflow" - c3="module load singularity" - #Paired Mode-> either align/VC/germline with FASTQ - if args.paired and args.sample_sheet: - sample_path="--sample_sheet '"+args.sample_sheet+"'" - if args.mode=="align": - mode="--PIPE_ALIGN" - elif args.mode=="vc" and args.sv: - mode="--PIPE_SV" - elif args.mode=="vc": - mode="--PIPE_VC" - elif args.mode=="germline": - mode="--PIPE_GERMLINE" - elif args.mode=="qc": - mode="--PIPE_QC" - if args.fastq: - in1="--fastq_input '"+args.fastq+"'" - elif args.filelist: - in1="--file_input "+args.filelist - elif args.bam: - in1="--bam "+args.bam - else: - print("Missing sample sheet for paired mode or you would like Tumro only mode!") - else: - #Tumor Only- fastq only - if args.mode=="align": - mode="--PIPE_TONLY_ALIGN" - sample_path="" - if args.fastq: - in1="--fastq_input '"+args.fastq+"'" - elif args.filelist: - in1="--file_input "+args.filelist - if args.mode=="vc": - sample_path="" - if args.fastq: - mode="--PIPE_TONLY_VC" - in1="--fastq_input '"+args.fastq+"'" - elif args.bam: - mode="--PIPE_TONLY_BAMVC" - in1="--bam_input '"+args.bam+"'" - elif args.filelist: - mode="--PIPE_TONLY_BAMVC" - in1="--file_input "+args.filelist - if args.mode=="qc": - sample_path="" - if args.fastq: - mode="--PIPE_TONLY_QC" - in1="--fastq_input '"+args.fastq+"'" - elif args.bam: - mode="--PIPE_TONLY_QC" - in1="--bam_input '"+args.bam+"'" - if (args.stub and args.profile is None): - profile="-profile localstub" - elif args.profile=="local": - profile="-profile local" - elif (args.profile=="biowulf" or args.profile is None): - profile="-profile biowulf" - if args.resume: - resume="-resume" - else: - resume="" - c4=["nextflow run",dirname + '/wgs-seek.nf',"-c "+ dirname +"/nextflow.config", - in1,profile,resume,sample_path,mode, - "--output '" +args.output+"'"+" --split_regions " +str(args.splitregions)] - cmd1=' '.join(c4) - code=c1+"\n"+c2+"\n"+c3+"\n"+cmd1 - time1=time.strftime("%Y_%m_%d_%H%M%S") - #outswarmmut='wgs_nf_'+time1+'.slurm' - outswarmmut=args.output+"_"+time1+'.slurm' - with open(outswarmmut, "a") as outfile: - outfile.write(code+"\n") - sbatch_mut="sbatch --cpus-per-task=2 --mem=16g --time 10-00:00:00 --partition norm --output submit_"+time1+".log --error error_"+time1+".log --mail-type=BEGIN,TIME_LIMIT_90,END "+outswarmmut - if args.stub: - cmd2=cmd1+" --split_regions 4 -stub -without-podman T -without-conda -without-docker" - print(cmd2) - os.system(cmd2) - elif args.submit: - print(sbatch_mut) - os.system(sbatch_mut) - else: - sbatch_out='run_sbatch'+time1+'.sh' - with open(sbatch_out, "a") as outfile: - outfile.write(sbatch_mut+"\n") - print(sbatch_mut) -if __name__=="__main__": - main() - From d3a86f79ce993c652d916b0d8d2e1862814471c9 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Mon, 11 Dec 2023 10:03:27 -0500 Subject: [PATCH 02/58] feat: added temporary driver script --- logan | 314 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100755 logan diff --git a/logan b/logan new file mode 100755 index 0000000..9a72893 --- /dev/null +++ b/logan @@ -0,0 +1,314 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +""" +ABOUT: This is the main entry for the LOGAN (whole genome sequencing pipeline). +REQUIRES: + - python>=3.5 + - nextflow + - singularity +DISCLAIMER: + PUBLIC DOMAIN NOTICE + CCR Collaborative Bioinformatics Resource (CCBR) + National Cancer Institute (NCI) +This software/database is a "United States Government Work" under +the terms of the United States Copyright Act. It was written as +part of the author's official duties as a United States Government +employee and thus cannot be copyrighted. This software is freely +available to the public for use. +Although all reasonable efforts have been taken to ensure the +accuracy and reliability of the software and data, CCBR do not and +cannot warrant the performance or results that may be obtained by +using this software or data. CCBR and NCI disclaim all warranties, +express or implied, including warranties of performance, +merchantability or fitness for any particular purpose. +Please cite the author and the "NIH Biowulf Cluster" in any work or +product based on this material. + + + PIPELINE TYPE + Align --PIPE_ALIGN-TRIM ALIGN + Variant Calls--PIPE_VC-Variant calling step after align + Germline Calls DV--PIPE_GERMLINE-Germline after align + QC requires Alignment, Germline--PIPE_QC--After everything + --PIPE_BAMVC-BAM variant calling only + --PIPE_TONLY_TRIM-Trim and Align + --PIPE_TONLY_TRIM-Trim and Align +""" + +# Python standard library +import argparse, os, time, sys, subprocess, re, json + + +def parse_args(): + parser = argparse.ArgumentParser(description="Input files") + parser.add_argument("--fastq", help="FQ Inputs") + parser.add_argument( + "--file_input", + help="TSV file of all fastq files used for input with 3 Columns Sample Name, Pair1, Pair2", + ) + parser.add_argument("--bam", help="Glob of all the BAM files []") + parser.add_argument("--sample_sheet", help="Sample sheet and required for Paired") + parser.add_argument("--splitregions", default=24, help="How splits per regions") + parser.add_argument("--vc", help="Add Somatic VC calling", action="store_true") + parser.add_argument("--cnv", help="Add CNV calling", action="store_true") + parser.add_argument( + "--sv", help="Add Structural Variant calling", action="store_true" + ) + parser.add_argument("--germline", help="Add Germline VC", action="store_true") + parser.add_argument( + "--qc", + help="Add QC Steps (Requires Germline Calling as well)", + action="store_true", + ) + parser.add_argument("--output", help="Output Directory") + parser.add_argument("--genome", help="hg38, mm10") + parser.add_argument("--profile", help="Biowulf or Local Run") + parser.add_argument( + "--resume", action="store_true", default="True", help="Resume previous run?" + ) + parser.add_argument("--submit", action="store_true", help="Submit to SLURM?") + parser.add_argument("--stub", action="store_true", help="Stub run") + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + dirname = os.path.dirname(os.path.realpath(__file__)) + outdirname = os.path.basename(os.getcwd()) + c1 = "#!/usr/bin/bash" + c2 = "module load nextflow" + c3 = "module load singularity" + # Paired Mode-> either align/VC/SV/CNV/germline(QC as well) with FASTQ + if args.sample_sheet: + sample_path = "--sample_sheet '" + args.sample_sheet + "'" + ##Input Section + if args.fastq: + in1 = "--fastq_input '" + args.fastq + "'" + elif args.file_input: + in1 = "--file_input " + args.file_input + elif args.bam: + in1 = "--bam_input '" + args.bam + "'" + baminput = True + else: + print( + "Missing sample sheet for paired mode or you would like Tumor only mode?" + ) + alignmode = "--PIPE_ALIGN" + if args.vc and args.bam: + vcmode = "--PIPE_BAMVC" + elif args.vc: + vcmode = "--PIPE_VC" + if args.sv and args.bam: + svmode = "--PIPE_BAMSV" + elif args.sv: + svmode = "--PIPE_SV" + if args.cnv and args.bam: + cnvmode = "--PIPE_BAMCNV" + elif args.cnv: + cnvmode = "--PIPE_CNV" + if args.germline and args.bam: + germmode = "--PIPE_BAMGERMLINE" + elif args.germline: + germmode = "--PIPE_GERMLINE" + if args.qc and args.germline: + qcmode = "--PIPE_QC_GL" + elif args.qc: + qcmode = "--PIPE_QC_NOGL" + else: + ##SET DEFAULT for Tumor-Only Modes//Tumor Only Mode (No sample sheet) + alignmode = "--PIPE_TONLY_ALIGN" + qcmode = "--PIPE_TONLY_QC" + if ( + args.file_input and re.search(r".bam", open(args.file_input, "r").read()) + ) or args.bam: + baminput = True + sample_path = "" + if args.vc: + if args.fastq: + vcmode = "--PIPE_TONLY_VC" + in1 = "--fastq_input '" + args.fastq + "'" + elif args.bam: + vcmode = "--PIPE_TONLY_BAMVC" + in1 = "--bam_input '" + args.bam + "'" + elif args.file_input: + in1 = "--file_input " + args.file_input + bamin = re.search(r".bam", open(args.file_input, "r").read()) + if bamin: + vcmode = "--PIPE_TONLY_BAMVC" + else: + vcmode = "--PIPE_TONLY_VC" + if args.sv: + if args.fastq: + svmode = "--PIPE_TONLY_SV" + in1 = "--fastq_input '" + args.fastq + "'" + elif args.bam: + svmode = "--PIPE_TONLY_BAMSV" + in1 = "--bam_input '" + args.bam + "'" + elif args.file_input: + in1 = "--file_input " + args.file_input + bamin = re.search(r".bam", open(args.file_input, "r").read()) + if bamin: + svmode = "--PIPE_TONLY_BAMSV" + else: + svmode = "--PIPE_TONLY_SV" + if args.cnv: + if args.fastq: + cnvmode = "--PIPE_TONLY_CNV" + in1 = "--fastq_input '" + args.fastq + "'" + elif args.bam: + cnvmode = "--PIPE_TONLY_BAMCNV" + in1 = "--bam_input '" + args.bam + "'" + elif args.file_input: + in1 = "--file_input " + args.file_input + bamin = re.search(r".bam", open(args.file_input, "r").read()) + if bamin: + cnvmode = "--PIPE_TONLY_BAMCNV" + else: + cnvmode = "--PIPE_TONLY_CNV" + if args.qc: + if args.fastq: + in1 = "--fastq_input '" + args.fastq + "'" + elif args.file_input: + in1 = "--file_input " + args.file_input + if args.stub and args.profile is None: + profile = "-profile localstub" + splitreg = "4" + elif args.profile == "local": + profile = "-profile local" + splitreg = str(args.splitregions) + elif args.profile == "biowulf" or args.profile is None: + profile = "-profile biowulf" + splitreg = str(args.splitregions) + if args.resume: + resume = "-resume" + else: + resume = "" + ###COMBINE ALL COMMANDS (PIPE ALIGN) + commandbase = [ + "nextflow run", + dirname + "/main.nf", + "-c " + dirname + "/nextflow.config", + in1, + profile, + resume, + sample_path, + "--genome", + args.genome, + "--output '" + args.output + "'" + " --split_regions " + splitreg, + ] + ##FINAL COMMANDS + if not "baminput" in locals(): + commandalign = commandbase + [alignmode] + cmd1 = " ".join(commandalign) + else: + cmd1 = "" + if args.vc: + commandvc = commandbase + [vcmode] + cmd2 = " ".join(commandvc) + else: + cmd2 = "" + if args.sv: + commandsv = commandbase + [svmode] + cmd3 = " ".join(commandsv) + else: + cmd3 = "" + if args.cnv: + commandcnv = commandbase + [cnvmode] + cmd4 = " ".join(commandcnv) + else: + cmd4 = "" + if args.germline: + commandgl = commandbase + [germmode] + cmd5 = " ".join(commandgl) + else: + cmd5 = "" + if args.qc: + commandqc = commandbase + [qcmode] + cmd6 = " ".join(commandqc) + else: + cmd6 = "" + code = ( + c1 + + "\n" + + c2 + + "\n" + + c3 + + "\n" + + cmd1 + + "\n" + + cmd2 + + "\n" + + cmd3 + + "\n" + + cmd4 + + "\n" + + cmd5 + + "\n" + + cmd6 + ) + time1 = time.strftime("%Y_%m_%d_%H%M") + stubbase = " -stub -without-podman T -without-conda -without-docker" + if args.stub: + if not "baminput" in locals(): + cmd1_stub = cmd1 + stubbase + else: + cmd1_stub = "" + if args.vc: + cmd2_stub = cmd2 + stubbase + else: + cmd2_stub = "" + if args.sv: + cmd3_stub = cmd3 + stubbase + else: + cmd3_stub = "" + if args.cnv: + cmd4_stub = cmd4 + stubbase + else: + cmd4_stub = "" + if args.germline: + cmd5_stub = cmd5 + stubbase + else: + cmd5_stub = "" + if args.qc: + cmd6_stub = cmd6 + stubbase + else: + cmd6_stub = "" + cmd_stub = ( + cmd1_stub + + "\n" + + cmd2_stub + + "\n" + + cmd3_stub + + "\n" + + cmd4_stub + + "\n" + + cmd5_stub + + "\n" + + cmd6_stub + ) + print(cmd_stub) + os.system(cmd_stub) + else: + outswarmmut = args.output + "_" + time1 + ".slurm" + with open(outswarmmut, "a") as outfile: + outfile.write(code + "\n") + sbatch_mut = ( + "sbatch --cpus-per-task=2 --mem=8g --time 10-00:00:00 --partition norm --output submit_" + + time1 + + ".log --error error_" + + time1 + + ".log --mail-type=BEGIN,END " + + outswarmmut + ) + sbatch_out = "kickoff_" + time1 + ".sh" + with open(sbatch_out, "a") as outfile: + outfile.write(sbatch_mut + "\n") + print(sbatch_mut) + if args.submit: + os.system(sbatch_mut) + + +if __name__ == "__main__": + main() From 6e91623a7735b93ff56627096c0aa99ec97ae427 Mon Sep 17 00:00:00 2001 From: dnousome Date: Mon, 11 Dec 2023 14:56:14 -0500 Subject: [PATCH 03/58] refcator: moved genomes to conf folder --- conf/genomes.config | 71 +++++++++ nextflow.config | 360 ++++++-------------------------------------- 2 files changed, 117 insertions(+), 314 deletions(-) diff --git a/conf/genomes.config b/conf/genomes.config index e69de29..a7810a5 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -0,0 +1,71 @@ +params { + genomes { + 'hg38' { + genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" + genomefai = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta.fai" + bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta" + genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict" + wgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" + intervals="${projectDir}/workflow/resources/hg38_v0_wgs_calling_regions.hg38.bed" + //millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" + //shapeitindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) // + KNOWNINDELS= '/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_indel.vcf' + KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz' + dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz" + dbsnp_indel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_indel.vcf" + gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz + pon = "/data/nousomedr/wgs/updatedpon.vcf.gz" //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon} + kgp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/1000G_phase1.snps.high_confidence.hg38.vcf.gz" + KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2" + snpeff_genome = "GRCh38.86" + snpeff_config = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/snpEff.config" + snpeff_bundle = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/" + sites_vcf= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/sites.hg38.vcf.gz" + somalier_ancestrydb="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/1kg-somalier" + vepcache = "/fdb/VEP/102/cache" + vepspecies = "homo_sapiens" + vepbuild = "GRCh38" + octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" + octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" + SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" + chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] + } + + 'mm10' { + genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa" // file(params.genome) + genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa.fai" // file(params.genome) + bwagenome= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwaindex/genome.fa" + genomedict= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.dict" + intervals="/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/mm10_wgsregions.bed" + KNOWNINDELS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz" + KNOWNRECAL = "-known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz -known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_snps.vcf.gz" + dbsnp = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_allstrains_dbSNP142.vcf.gz" + pon = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_dbSNP_allStrains_compSet_noIND.vcf.gz" + kgp = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_knownSNPs_sites.vcf.gz" + KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2" + gnomad= "--germline-resource /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_allstrains_dbSNP142.vcf.gz" + snpeff_genome = "GRCm38.86" + snpeff_config = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/snpEff/4.3t/snpEff.config" + snpeff_bundle = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/snpEff/4.3t/" + sites_vcf = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/somalier/mm10.sites.vcf.gz" + //EDIT SOMALIER ANCESTRY AFTER! + somalier_ancestrydb="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/1kg-somalier" + vepcache = "/fdb/VEP/102/cache" + vepspecies = "mus_musculus" + vepbuild= "GRCm38" + octopus_sforest = "" + octopus_gforest = "" + SEQUENZAGC = '/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/SEQUENZA/mm10.gc50Base.wig.gz' + FREEC { + FREECLENGTHS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10.fa.fai" + FREECCHROMS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/Chromosomes" + FREECPILEUP = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10_dbSNP137.ucsc.freec.bed" + FREECSNPS= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10_dbSNP137.ucsc.freec.txt.gz" + } + chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chrX','chrY','chrM'] + } + } +} + + + diff --git a/nextflow.config b/nextflow.config index 987ca4b..1581cff 100644 --- a/nextflow.config +++ b/nextflow.config @@ -3,60 +3,57 @@ manifest { name = "CCBR/LOGAN" author = "CCR Collaborative Bioinformatics Resource" homePage = "https://github.com/CCBR/LOGAN" - description = "one-line description of LOGAN goes here" + description = "whoLe genOme-sequencinG Analysis pipeliNe" mainScript = "main.nf" } + -includeConfig 'conf/hg38.config' -includeConfig 'conf/mm10.config' +includeConfig 'conf/genomes.config' +includeConfig 'conf/base.config' +includeConfig 'conf/modules.config' +includeConfig 'conf/containers.config' + + +params { -params { // TODO create a separate genome config, with genome index dir that can change depending on platform. see https://github.com/CCBR/CHAMPAGNE/blob/main/conf/genomes.config - genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" // file(params.genome) - genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict" - wgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" // - millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" - shapeitindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) // - dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz" - dbsnp_indel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_indel.vcf" - gnomad = '/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz - //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon} - pon = "/data/nousomedr/wgs/updatedpon.vcf.gz" - kgp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/1000G_phase1.snps.high_confidence.hg38.vcf.gz" - KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2" fastq_screen_conf= "${projectDir}/workflow/resources/fastq_screen.conf" get_flowcell_lanes="${projectDir}/workflow/scripts/flowcell_lane.py" - intervals="${projectDir}/workflow/resources/hg38_v0_wgs_calling_regions.hg38.bed" splitbed="${projectDir}/workflow/resources/split_Bed_into_equal_regions.py" - split_regions = "24" //Number of regions to split by - snpeff_genome = "GRCh38.86" - snpeff_config = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/snpEff.config" - snpeff_bundle = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/" - sites_vcf= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/sites.hg38.vcf.gz" - somalier_ancestrydb="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/1kg-somalier" + split_regions = "36" //Number of regions to split by script_genderPrediction = "${projectDir}/workflow/scripts/RScripts/predictGender.R" script_combineSamples = "${projectDir}/workflow/scripts/RScripts/combineAllSampleCompareResults.R" script_ancestry = "${projectDir}/workflow/scripts/RScripts/sampleCompareAncestoryPlots.R" - bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta" + script_sequenza = "${projectDir}/workflow/scripts/RScripts/run_sequenza.R" + script_freec = "${projectDir}/workflow/scripts/make_freec_genome.pl" + script_freecpaired = "${projectDir}/workflow/scripts/freec_paired.pl" + freec_significance = "${projectDir}/workflow/scripts/assess_significance.R" + freec_plot = "${projectDir}/workflow/scripts/makeGraph.R" + lofreq_convert = "${projectDir}/workflow/scripts/add_gt_lofreq.sh" vep_cache = "/fdb/VEP/102/cache" - //Biowulf - config_profile_description = 'Biowulf nf-core config' - config_profile_contact = 'staff@hpc.nih.gov' - max_memory = 224.GB - max_cpus = 32 - output = "output" - //SUB WORKFLOWS to SPLIT PIPE_ALIGN=null - PIPE_GERMLINE=null + PIPE_GL=null PIPE_VC=null PIPE_SV=null + PIPE_CNV=null PIPE_QC=null + PIPE_QC_NOGL=null + PIPE_QC_GL=null PIPE_BAMVC=null + PIPE_BAMCNV=null + PIPE_BAMSV=null + PIPE_TONLY_ALIGN=null PIPE_TONLY_VC=null + PIPE_TONLY_SV=null + PIPE_TONLY_CNV=null + PIPE_BAMVC_TONLY=null PIPE_TONLY_BAMVC=null + PIPE_TONLY_BAMSV=null + PIPE_TONLY_BAMCNV=null + PIPE_TONLY_QC=null //Set all Inputs to null @@ -66,9 +63,9 @@ params { // TODO create a separate genome config, with genome index dir that can file_input=null } -includeConfig 'conf/base.config' profiles { + debug { process.beforeScript = 'echo $HOSTNAME' } docker { docker.enabled = true @@ -78,6 +75,13 @@ profiles { // once this is established and works well, nextflow might implement this behavior as new default. docker.runOptions = '-u \$(id -u):\$(id -g)' } + singularity { + enabled = true + autoMounts = true + cacheDir = "$PWD/singularity" + envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID' + runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' + } biowulf { includeConfig 'conf/biowulf.config' } @@ -93,286 +97,24 @@ profiles { ci_stub { includeConfig 'conf/ci_stub.config' } - - local { // TODO move all containers to conf/containers.config - process { - executor = 'local' - withName:fc_lane { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - } - withName:fastq_screen { - container= 'docker://nciccbr/ccbr_fastq_screen_0.13.0:v2.0' - } - withName:kraken { - container= 'docker://nciccbr/ccbr_kraken_v2.1.1:v0.0.1' - } - withName:fastqc { - container= 'docker://nciccbr/ccbr_fastqc_0.11.9:v1.1' - } - withName: qualimap_bamqc { - container= 'docker://nciccbr/ccbr_qualimap:v0.0.1' - } - withName: 'samtools_flagstats|vcftools|bcftools_stats|gatk_varianteval|snpeff|somalier_extract|somalier_analysis' { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - } - withName: 'multiqc' { - container= 'docker://nciccbr/ccbr_multiqc_1.9:v0.0.1' - } - withName: 'collectvariantcallmetrics' { - container= 'docker://nciccbr/ccbr_picard:v0.0.1' - } - withName: 'fastp|bwamem2|indelrealign|bqsr|gatherbqsr|samtoolsindex|applybqsr' { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - } - withName:'mutect2|mutect2_t|mutect2_t_tonly|mutect2filter|mutect2filter_tonly|learnreadorientationmodel|learnreadorientationmodel_tonly|contamination_paired|contamination_tumoronly|pileup_paired_t|pileup_paired_n|pileup_paired_tonly' { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - } - withName: 'strelka_tn|vardict_tn|vardict_tonly|varscan_tn|varscan_tonly|combineVariants|combineVariants_strelka' { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - } - withName: 'annotvep_tn|annotvep_tonly' { - container= 'docker://dnousome/ccbr_vcf2maf:v102.0.0' - } - withName: 'svaba_somatic' { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - } - - } - singularity { - enabled = true - autoMounts = true - cacheDir = "$PWD/singularity" - envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID' - // TODO refactor to no longer need bind mounts. These paths also only work on biowulf - runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' - - } - } - - localstub { - process { - executor = 'local' - - singularity { - enabled = false - } - } - } - - biowulf_DEPRECATED { // TODO switch all cpus/time/memory definitions to use labels in conf/base.config - process { - executor = 'slurm' - queue = 'norm' - queueSize = 200 - errorStrategy = 'finish' - maxRetries = 0 - pollInterval = '2 min' - queueStatInterval = '5 min' - submitRateLimit = '6/1min' - - //Default options for Slurm Nodes - cpus= '4' - time= 48.h - memory=64.GB - - timeline.enabled = true - report.enabled = true - //scratch = '/lscratch/$SLURM_JOBID' - - - //Each Process and Container if it exists or use module in each process - withName:fastq_screen{ - container= 'docker://nciccbr/ccbr_fastq_screen_0.13.0:v2.0' - memory=32.GB - time=48.h - cpus=4 - } - withName:fastqc { - container= 'docker://nciccbr/ccbr_fastqc_0.11.9:v1.1' - memory=24.GB - time=24.h - cpus=8 - } - withName:fastp { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory=24.GB - time=24.h - cpus=4 - } - withName:fc_lane { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - } - withName:bwamem2 { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory=200.GB - time=48.h - cpus=17 - } - withName:indelrealign{ - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory=48.GB - time=72.h - cpus=16 - } - withName:bqsr{ - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory= 32.GB - time= 48.h - } - withName:gatherbqsr{ - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory= 16.GB - time= 2.h - } - withName:applybqsr{ - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory= 48.GB - time= 48.h - } - withName:samtoolsindex{ - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory= 16.GB - time= 12.h - cpus= 4 - } - withName: 'mutect2|mutect2_t|mutect2_t_tonly' { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory= 48.GB - cpus= 4 - time= 72.h - } - withName: 'vardict_tn|vardict_tonly|varscan_tn|varscan_tonly|combineVariants|combineVariants_strelka' { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory= 32.GB - cpus= 2 - time= 72.h - } - withName: 'strelka_tn' { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory= 48.GB - cpus= 16 - time= 72.h - } - withName:'mutect2filter|mutect2filter_tonly' { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory= 24.GB - time= 24.h - cpus= 4 - } - withName:'contamination_paired|contamination_tumoronly'{ - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory= 24.GB - time= 24.h - } - withName:'learnreadorientationmodel|learnreadorientationmodel_tonly' { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory= 48.GB - time= 24.h - } - withName:'mergemut2stats|mergemut2stats_tonly' { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory= 16.GB - time= 24.h - } - withName:'pileup_paired_t|pileup_paired_n|pileup_paired_tonly' { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory= 16.GB - time= 12.h - } - withName:'annotvep_tn|annotvep_tonly'{ - container= 'docker://dnousome/ccbr_vcf2maf:v102.0.0' - memory= 32.GB - time= 24.h - cpus=16 - } - withName:kraken { - container= 'docker://nciccbr/ccbr_kraken_v2.1.1:v0.0.1' - memory= 64.GB - time= 24.h - cpus=16 - clusterOptions="--gres=lscratch:256" - } - withName:'deepvariant_step1' { - memory= 64.GB - time= 24.h - cpus=2 - } - withName:'deepvariant_step3' { - memory= 64.GB - time= 24.h - cpus=2 - clusterOptions="--gres=lscratch:256" - } - withName:'deepvariant_step2|deepvariant_combined' { - memory= 70.GB - time= 24.h - cpus= 17 - queue = 'gpu' - clusterOptions="--partition=gpu --gres=gpu:v100x:1,lscratch:256" - } - withName:'somalier_extract|somalier_analysis' { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory= 16.GB - time= 12.h - } - withName:'gatk_varianteval' { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory= 16.GB - time= 12.h - } - withName:'qualimap_bamqc' { - container= 'docker://nciccbr/ccbr_qualimap:v0.0.1' - cpus= 8 - memory= 120.GB - time= 48.h - //errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : task.exitStatus in [143, 255] ? 'ignore' : 'finish' } - } - withName:'cobalt|amber|purple' { - memory= 64.GB - time= 12.h - cpus=16 - } - withName:'svaba_somatic' { - container= 'docker://dnousome/ccbr_logan_base:v0.3.0' - memory= 64.GB - time= 24.h - cpus=16 - } - - } - } } - -includeConfig 'conf/genomes.config' -includeConfig 'conf/containers.config' - + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. -env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" - JULIA_DEPOT_PATH = "/usr/local/share/julia" -} + env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" + } // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] - //Container options - singularity { - enabled = true - autoMounts = true - cacheDir = "$PWD/singularity" - envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID' - runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' - } + - } -} -includeConfig 'conf/genomes.config' -includeConfig 'conf/containers.config' // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. @@ -384,6 +126,7 @@ env { JULIA_DEPOT_PATH = "/usr/local/share/julia" } + // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] @@ -399,17 +142,6 @@ report { file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } -includeConfig 'conf/modules.config' - - -manifest { - name = "CCBR/LOGAN" - author = "CCR Collaborative Bioinformatics Resource" - homePage = "https://github.com/CCBR/LOGAN" - description = "whoLe genOme-sequencinG Analysis pipeliNe" - mainScript = "main.nf" -} - // Function to ensure that resource requirements don't go beyond // a maximum limit def check_max(obj, type) { From 6508da62a6a28217ea978ec02708ebcafe949025 Mon Sep 17 00:00:00 2001 From: dnousome Date: Mon, 11 Dec 2023 14:56:28 -0500 Subject: [PATCH 04/58] fix: renamed genome variable --- modules/local/germline.nf | 27 +-- modules/local/qc.nf | 173 +++++++++++++------ modules/local/splitbed.nf | 9 + modules/local/trim_align.nf | 144 ++++++++-------- modules/local/variant_calling_tonly.nf | 229 ++++++++++++++++++------- 5 files changed, 383 insertions(+), 199 deletions(-) diff --git a/modules/local/germline.nf b/modules/local/germline.nf index b004b80..d3544a5 100644 --- a/modules/local/germline.nf +++ b/modules/local/germline.nf @@ -1,10 +1,5 @@ -//References -GENOME=file(params.genome) +GENOMEREF=file(params.genomes[params.genome].genome) MODEL="/opt/models/wgs/model.ckpt" -intervalbedin = file(params.intervals) - - - //Output Directory outdir=file(params.output) @@ -14,8 +9,6 @@ outdir=file(params.output) process deepvariant_step1 { module=['deepvariant/1.4.0'] - //publishDir("${outdir}/deepvariant", mode: 'copy') - input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path(bed) @@ -29,7 +22,7 @@ process deepvariant_step1 { mkdir -p gvcf make_examples \ --mode calling \ - --ref $GENOME \ + --ref $GENOMEREF \ --regions ${bed} \ --reads ${samplename}.bam \ --channels insert_size \ @@ -52,7 +45,6 @@ process deepvariant_step2 { module=['deepvariant/1.4.0'] - //publishDir("${outdir}/deepvariant", mode: 'copy') input: tuple val(samplename), path(tfrecords), path(tfgvcf) @@ -80,7 +72,6 @@ process deepvariant_step2 { //Step 3 DV process deepvariant_step3 { - scratch '/lscratch/$SLURM_JOB_ID/dv' publishDir("${outdir}/deepvariant", mode: 'copy') module=['deepvariant/1.4.0'] @@ -97,7 +88,7 @@ process deepvariant_step3 { script: """ postprocess_variants \ - --ref $GENOME \ + --ref $GENOMEREF \ --infile ${samplename}_call_variants_output.tfrecord.gz \ --outfile ${samplename}.vcf.gz \ --gvcf_outfile ${samplename}.gvcf.gz \ @@ -107,7 +98,7 @@ process deepvariant_step3 { stub: """ touch ${samplename}.vcf.gz ${samplename}.vcf.gz.tbi - touch ${samplename}.gvcf.gz ${samplename}.gvcf.gz.tbi + touch ${samplename}.gvcf.gz ${samplename}.gvcf.gz.tbi """ @@ -116,7 +107,6 @@ process deepvariant_step3 { //Combined DeepVariant process deepvariant_combined { module=['deepvariant/1.4.0'] - scratch '/lscratch/$SLURM_JOB_ID/dv' publishDir("${outdir}/deepvariant", mode: 'copy') @@ -132,7 +122,7 @@ process deepvariant_combined { """ run_deepvariant \ --model_type=WGS \ - --ref=$GENOME \ + --ref=$GENOMEREF \ --reads=${samplename}.bam \ --output_gvcf= ${samplename}.gvcf.gz \ --output_vcf=${samplename}.vcf.gz \ @@ -151,11 +141,10 @@ process deepvariant_combined { } process glnexus { - //scratch '/lscratch/$SLURM_JOB_ID/dv' - publishDir("${outdir}/deepvariant", mode: 'copy') module=['glnexus','bcftools'] - + + publishDir("${outdir}/deepvariant", mode: 'copy') input: path(gvcfs) @@ -173,7 +162,7 @@ process glnexus { -m - \ -Oz \ --threads 8 \ - -f $GENOME \ + -f $GENOMEREF \ -o germline.norm.vcf.gz \ germline.v.bcf diff --git a/modules/local/qc.nf b/modules/local/qc.nf index 47c98f5..65515e8 100644 --- a/modules/local/qc.nf +++ b/modules/local/qc.nf @@ -1,22 +1,15 @@ ///References to assign -GENOME=file(params.genome) -GENOMEDICT=file(params.genomedict) -WGSREGION=file(params.wgsregion) -MILLSINDEL=file(params.millsindel) //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz -SHAPEITINDEL=file(params.shapeitindel) //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz -KGP=file(params.kgp) //1000G_phase1.snps.high_confidence.hg38.vcf.gz" -DBSNP=file(params.dbsnp) //dbsnp_138.hg38.vcf.gz" -GNOMAD=file(params.gnomad) //somatic-hg38-af-only-gnomad.hg38.vcf.gz -PON=file(params.pon) +GENOMEREF=file(params.genomes[params.genome].genome) +DBSNP=file(params.genomes[params.genome].dbsnp) //dbsnp_138.hg38.vcf.gz" FASTQ_SCREEN_CONF=file(params.fastq_screen_conf) -BACDB=file(params.KRAKENBACDB) -SNPEFF_GENOME = params.snpeff_genome -SNPEFF_CONFIG = file(params.snpeff_config) -SNPEFF_BUNDLE = file(params.snpeff_bundle) +BACDB=file(params.genomes[params.genome].KRAKENBACDB) +SNPEFF_GENOME = params.genomes[params.genome].snpeff_genome +SNPEFF_CONFIG = file(params.genomes[params.genome].snpeff_config) +SNPEFF_BUNDLE = file(params.genomes[params.genome].snpeff_bundle) //SOMALIER -SITES_VCF= file(params.sites_vcf) -ANCESTRY_DB=file(params.somalier_ancestrydb) +SITES_VCF= file(params.genomes[params.genome].sites_vcf) +ANCESTRY_DB=file(params.genomes[params.genome].somalier_ancestrydb) SCRIPT_PATH_GENDER = file(params.script_genderPrediction) SCRIPT_PATH_SAMPLES = file(params.script_combineSamples) SCRIPT_PATH_PCA = file(params.script_ancestry) @@ -26,7 +19,7 @@ SCRIPT_PATH_PCA = file(params.script_ancestry) outdir=file(params.output) process fc_lane { - + label 'process_low' publishDir("${outdir}/QC/fc_lane/", mode:'copy') input: @@ -57,7 +50,6 @@ process fastq_screen { publishDir(path: "${outdir}/QC/fastq_screen/", mode:'copy') - //module=['fastq_screen/0.15.2','bowtie/2-2.5.1'] input: tuple val(samplename), path("${samplename}.R1.trimmed.fastq.gz"), @@ -107,11 +99,6 @@ process kraken { Kraken logfile and interative krona report */ publishDir(path: "${outdir}/QC/kraken/", mode: 'copy') - - //module=['kraken/2.1.2', 'kronatools/2.8'] - scratch '/lscratch/$SLURM_JOB_ID' - //scratch '/data/CCBR/rawdata/nousome/small_truth_set' //CHANGE AFTER to LSCRATCH - input: tuple val(samplename), @@ -208,7 +195,7 @@ process qualimap_bamqc { //module: config['images']['qualimap'] input: - tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bai") + tuple val(samplename), path(bam), path(bai) output: tuple path("${samplename}_genome_results.txt"), path("${samplename}_qualimapReport.html") @@ -216,7 +203,7 @@ process qualimap_bamqc { script: """ unset DISPLAY - qualimap bamqc -bam ${samplename}.bqsr.bam \ + qualimap bamqc -bam ${bam} \ --java-mem-size=112G \ -c -ip \ -outdir ${samplename} \ @@ -247,18 +234,19 @@ process samtools_flagstats { @Output: Text file containing alignment statistics */ - publishDir("${outdir}/QC/flagstats/", mode: "copy") - //module=['samtools/1.16.1'] + label 'process_mid' + publishDir("${outdir}/QC/flagstats/", mode: "copy") + input: - tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bai") + tuple val(samplename), path(bam), path(bai) output: path("${samplename}.samtools_flagstat.txt") script: """ - samtools flagstat ${samplename}.bqsr.bam > ${samplename}.samtools_flagstat.txt + samtools flagstat ${bam} > ${samplename}.samtools_flagstat.txt """ stub: @@ -267,6 +255,48 @@ process samtools_flagstats { """ } + +process mosdepth { + /* + Quality-control step to assess depth + @Input: + Recalibrated BAM file (scatter) + @Output: + `{prefix}.mosdepth.global.dist.txt` + `{prefix}.mosdepth.summary.txt` + `{prefix}.mosdepth.region.dist.txt` (if --by is specified) + `{prefix}.per-base.bed.gz|per-base.d4` (unless -n/--no-per-base is specified) + `{prefix}.regions.bed.gz` (if --by is specified) + `{prefix}.quantized.bed.gz` (if --quantize is specified) + `{prefix}.thresholds.bed.gz` (if --thresholds is specified) + */ + + publishDir("${outdir}/QC/mosdepth/", mode: "copy") + + input: + tuple val(samplename), path(bam), path(bai) + + output: + path("${samplename}.mosdepth.region.dist.txt"), + path("${samplename}.mosdepth.summary.txt"), + path("${samplename}.regions.bed.gz"), + path("${samplename}.regions.bed.gz.csi") + + + script: + """ + mosdepth -n --fast-mode --by 500 ${samplename} ${bam} -t $task.cpus + """ + + stub: + """ + touch "${samplename}.mosdepth.region.dist.txt" + touch "${samplename}.mosdepth.summary.txt" + touch "${samplename}.regions.bed.gz" + touch "${samplename}.regions.bed.gz.csi" + """ +} + process vcftools { /* Quality-control step to calculates a measure of heterozygosity on @@ -279,8 +309,9 @@ process vcftools { @Output: Text file containing a measure of heterozygosity */ + label 'process_mid' + publishDir(path:"${outdir}/QC/vcftools", mode: 'copy') - //module=['vcftools/0.1.16'] input: tuple path(germlinevcf),path(germlinetbi) @@ -311,9 +342,7 @@ process collectvariantcallmetrics { Text file containing a collection of metrics relating to snps and indels */ publishDir("${outdir}/QC/variantmetrics", mode: 'copy') - //module=['picard/2.20.8'] - //container: config['images']['picard'] - + input: tuple path(germlinevcf),path(germlinetbi) @@ -321,9 +350,6 @@ process collectvariantcallmetrics { tuple path("raw_variants.variant_calling_detail_metrics"), path("raw_variants.variant_calling_summary_metrics") - //params: - // dbsnp=config['references']['DBSNP'], - // prefix = os.path.join(output_qcdir,"raw_variants"), script: """ @@ -356,6 +382,7 @@ process bcftools_stats { Text file containing a collection of summary statistics */ + label 'process_mid' publishDir("${outdir}/QC/bcftoolsstat", mode: 'copy') input: @@ -388,8 +415,9 @@ process gatk_varianteval { @Output: Evaluation table containing a collection of summary statistics */ + label 'process_mid' + publishDir("${outdir}/QC/gatk_varianteval", mode: 'copy') - //module=['GATK/4.2.0.0'] input: tuple val(samplename), path("${samplename}.gvcf.gz") ,path("${samplename}.gvcf.gz.tbi") @@ -406,7 +434,7 @@ process gatk_varianteval { script: """ gatk --java-options '-Xmx12g -XX:ParallelGCThreads=16' VariantEval \ - -R $GENOME \ + -R $GENOMEREF \ -O ${samplename}.germline.eval.grp \ --dbsnp $DBSNP \ --eval ${samplename}.gvcf.gz @@ -431,12 +459,7 @@ process snpeff { @Output: Evaluation table containing a collection of summary statistics */ - - //genome = config['references']['SNPEFF_GENOME'], - //config = config['references']['SNPEFF_CONFIG'], - //bundle = config['references']['SNPEFF_BUNDLE'], - //envmodules: 'snpEff/4.3t' - //container: config['images']['wes_base'] + label 'process_mid' publishDir("${outdir}/QC/snpeff", mode: 'copy') input: @@ -473,6 +496,7 @@ process somalier_extract { @Output: Exracted sites in (binary) somalier format */ + label 'process_low' publishDir("${outdir}/QC/somalier", mode: 'copy') input: @@ -490,7 +514,7 @@ process somalier_extract { somalier extract \ -d output \ --sites $SITES_VCF \ - -f $GENOME \ + -f $GENOMEREF \ ${samplename}.bam """ @@ -501,7 +525,7 @@ process somalier_extract { """ } -process somalier_analysis { +process somalier_analysis_human { /* To estimate relatedness, Somalier uses extracted site information to compare across all samples. This step also runs the ancestry estimation @@ -511,13 +535,9 @@ process somalier_analysis { @Output: Separate tab-separated value (TSV) files with relatedness and ancestry outputs - ancestry_db = config['references']['SOMALIER']['ANCESTRY_DB'], - sites_vcf = config['references']['SOMALIER']['SITES_VCF'], - genomeFasta = config['references']['GENOME'], - script_path_gender = config['scripts']['genderPrediction'], - script_path_samples = config['scripts']['combineSamples'], - script_path_pca = config['scripts']['ancestry'], */ + label 'process_low' + publishDir("${outdir}/QC/somalier", mode: 'copy') input: @@ -570,6 +590,57 @@ process somalier_analysis { """ } +process somalier_analysis_mouse { + /* + To estimate relatedness, Somalier uses extracted site information to + compare across all samples. This step also runs the ancestry estimation + function in Somalier. + @Input: + Exracted sites in (binary) somalier format for ALL samples in the cohort + @Output: + Separate tab-separated value (TSV) files with relatedness and ancestry outputs + + */ + label 'process_low' + + publishDir("${outdir}/QC/somalier", mode: 'copy') + + input: + path(somalierin) + + output: + tuple path("relatedness.pairs.tsv"), + path("relatedness.samples.tsv"), + path("predicted.genders.tsv"), + path("predicted.pairs.tsv") + + script: + """ + echo "Estimating relatedness" + somalier relate \ + -o "relatedness" \ + $somalierin + + Rscript $SCRIPT_PATH_GENDER \ + relatedness.samples.tsv \ + predicted.genders.tsv + + Rscript $SCRIPT_PATH_SAMPLES \ + relatedness.pairs.tsv \ + predicted.pairs.tsv + + """ + + stub: + + """ + touch relatedness.pairs.tsv + touch relatedness.samples.tsv + touch predicted.genders.tsv + touch predicted.pairs.tsv + + """ +} process multiqc { diff --git a/modules/local/splitbed.nf b/modules/local/splitbed.nf index 09ffb9b..0ae2416 100644 --- a/modules/local/splitbed.nf +++ b/modules/local/splitbed.nf @@ -22,3 +22,12 @@ process splitinterval { python $SPLIT_BED -infile ${BED_IN} -num ${SPLIT_REGIONS} -out 'bedout/bed' """ } + +/* +Code to convert beds to interval list +awk -F '\t' '{printf("%s\t0\t%s\n",$1,$2);}' genome.fa.fai +bedtools subtract -a GRCh38.primary_assembly.genome.bed -b ../hg38.blacklist.bed > GRCh38.primary_assembly.genome.interval.bed + +gatk BedToIntervalList -I GRCh38.primary_assembly.genome.interval.bed -O \ +GRCh38.primary_assembly.genome.interval_list -SD GRCh38.primary_assembly.genome.dict +*/ diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf index 766595a..fefe243 100644 --- a/modules/local/trim_align.nf +++ b/modules/local/trim_align.nf @@ -1,17 +1,12 @@ -GENOME=file(params.genome) -GENOMEDICT=file(params.genomedict) -WGSREGION=file(params.wgsregion) -MILLSINDEL=file(params.millsindel) //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz -SHAPEITINDEL=file(params.shapeitindel) //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz -KGP=file(params.kgp) //1000G_phase1.snps.high_confidence.hg38.vcf.gz" -DBSNP=file(params.dbsnp) //dbsnp_138.hg38.vcf.gz" -GNOMAD=file(params.gnomad) //somatic-hg38-af-only-gnomad.hg38.vcf.gz -PON=file(params.pon) +GENOMEREF=file(params.genomes[params.genome].genome) +KNOWNRECAL = params.genomes[params.genome].KNOWNRECAL outdir=file(params.output) -process fastp{ +process fastp { + label 'process_mid' tag { name } + publishDir(path: "${outdir}/QC/fastp", mode: 'copy', pattern: '{*fastp.json,*fastp.html}') input: tuple val(samplename), path(fqs) @@ -25,7 +20,7 @@ process fastp{ script: """ - fastp -w 4 \ + fastp -w $task.cpus \ --detect_adapter_for_pe \ --in1 ${fqs[0]} \ --in2 ${fqs[1]} \ @@ -48,6 +43,7 @@ process fastp{ process bwamem2 { tag { name } + input: tuple val(samplename), path("${samplename}.R1.trimmed.fastq.gz"), @@ -59,18 +55,17 @@ process bwamem2 { tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") script: - //BWAmem2/samblaster/samtools sort for marking duplicates; """ bwa-mem2 mem -M \ -R '@RG\\tID:${samplename}\\tSM:${samplename}\\tPL:illumina\\tLB:${samplename}\\tPU:${samplename}\\tCN:hgsc\\tDS:wgs' \ - -t 16 \ - ${GENOME} \ + -t $task.cpus \ + ${GENOMEREF} \ ${samplename}.R1.trimmed.fastq.gz ${samplename}.R2.trimmed.fastq.gz | \ samblaster -M | \ - samtools sort -@12 -m 4G - -o ${samplename}.bam + samtools sort -@ $task.cpus -m 4G - -o ${samplename}.bam - samtools index -@ 8 ${samplename}.bam ${samplename}.bai + samtools index -@ $task.cpus ${samplename}.bam ${samplename}.bai """ @@ -80,66 +75,26 @@ process bwamem2 { """ } -process indelrealign { - /* - Briefly, RealignerTargetCreator runs faster with increasing -nt threads, - while IndelRealigner shows diminishing returns for increasing scatter - */ - tag { name } - - input: - tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") - - output: - tuple val(samplename), path("${samplename}.ir.bam") - - script: - - """ - /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T RealignerTargetCreator \ - -I ${samplename}.bam \ - -R ${GENOME} \ - -o ${samplename}.intervals \ - -nt 16 \ - -known ${MILLSINDEL} -known ${SHAPEITINDEL} - - /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T IndelRealigner \ - -R ${GENOME} \ - -I ${samplename}.bam \ - -known ${MILLSINDEL} -known ${SHAPEITINDEL} \ - --use_jdk_inflater \ - --use_jdk_deflater \ - -targetIntervals ${samplename}.intervals \ - -o ${samplename}.ir.bam - """ - - - stub: - """ - touch ${samplename}.ir.bam - """ - -} - process bqsr { /* Base quality recalibration for all samples */ + + label 'process_low' input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path(bed) output: - tuple val(samplename),path("${samplename}_${bed.simpleName}.recal_data.grp"),emit: bqsrby - //path("${bam.simpleName}_${bed.simpleName}.recal_data.grp"), emit: bqsrby + tuple val(samplename),path("${samplename}_${bed.simpleName}.recal_data.grp"), emit: bqsrby script: """ - gatk --java-options '-Xmx32g' BaseRecalibrator \ + gatk --java-options '-Xmx16g' BaseRecalibrator \ --input ${samplename}.bam \ - --reference ${GENOME} \ - --known-sites ${MILLSINDEL} --known-sites ${SHAPEITINDEL} \ + --reference ${GENOMEREF} \ + ${KNOWNRECAL} \ --output ${samplename}_${bed.simpleName}.recal_data.grp \ --intervals ${bed} """ @@ -152,7 +107,7 @@ process bqsr { } process gatherbqsr { - + label 'process_low' input: tuple val(samplename), path(recalgroups) output: @@ -169,6 +124,7 @@ process gatherbqsr { """ stub: + """ touch ${samplename}.recal_data.grp """ @@ -179,6 +135,7 @@ process applybqsr { /* Base quality recalibration for all samples to */ + label 'process_low' publishDir(path: "${outdir}/bams/BQSR", mode: 'copy') input: @@ -188,19 +145,19 @@ process applybqsr { tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bai") script: - """ + """ gatk --java-options '-Xmx32g' ApplyBQSR \ - --reference ${GENOME} \ + --reference ${GENOMEREF} \ --input ${samplename}.bam \ --bqsr-recal-file ${samplename}.recal_data.grp \ --output ${samplename}.bqsr.bam \ --use-jdk-inflater \ --use-jdk-deflater - """ stub: + """ touch ${samplename}.bqsr.bam ${samplename}.bqsr.bai """ @@ -210,6 +167,7 @@ process applybqsr { process samtoolsindex { + label 'process_mid' publishDir(path: "${outdir}/bams/BQSR", mode: 'copy') input: @@ -220,7 +178,7 @@ process samtoolsindex { script: """ - samtools index -@ 4 ${bam} ${bam}.bai + samtools index -@ $task.cpus ${bam} ${bam}.bai """ stub: @@ -230,8 +188,9 @@ process samtoolsindex { } -//Save to CRAM for output and publish -process bamtocram_tonly{ +//Save to CRAM for output +process bamtocram_tonly { + label 'process_mid' input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -241,6 +200,49 @@ process bamtocram_tonly{ script: """ - samtools view -@ 4 -C -T $GENOME -o ${sample}.cram {$tumor}.bam + samtools view -@ $task.cpus -C -T $GENOMEREF -o ${sample}.cram {$tumor}.bam """ -} \ No newline at end of file +} + + +/* +process indelrealign { + //Briefly, RealignerTargetCreator runs faster with increasing -nt threads, + //while IndelRealigner shows diminishing returns for increasing scatter + + tag { name } + + input: + tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") + + output: + tuple val(samplename), path("${samplename}.ir.bam") + + script: + + """ + /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T RealignerTargetCreator \ + -I ${samplename}.bam \ + -R ${GENOMEREF} \ + -o ${samplename}.intervals \ + -nt 16 \ + -known ${MILLSINDEL} -known ${SHAPEITINDEL} + + /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T IndelRealigner \ + -R ${GENOMEREF} \ + -I ${samplename}.bam \ + -known ${MILLSINDEL} -known ${SHAPEITINDEL} \ + --use_jdk_inflater \ + --use_jdk_deflater \ + -targetIntervals ${samplename}.intervals \ + -o ${samplename}.ir.bam + """ + + + stub: + """ + touch ${samplename}.ir.bam + """ + +} +*/ diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index 3b73da7..3d67e26 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -1,13 +1,15 @@ -GENOME=file(params.genome) -GENOMEDICT=file(params.genomedict) -WGSREGION=file(params.wgsregion) -MILLSINDEL=file(params.millsindel) //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz -SHAPEITINDEL=file(params.shapeitindel) //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz -KGP=file(params.kgp) //1000G_phase1.snps.high_confidence.hg38.vcf.gz" -DBSNP=file(params.dbsnp) //dbsnp_138.hg38.vcf.gz" -GNOMAD=file(params.gnomad) //somatic-hg38-af-only-gnomad.hg38.vcf.gz -PON=file(params.pon) -VEP_CACHEDIR=file(params.vep_cache) +GENOMEREF=file(params.genomes[params.genome].genome) +GENOMEFAI=file(params.genomes[params.genome].genomefai) +GENOMEDICT=file(params.genomes[params.genome].genomedict) +KGPGERMLINE=params.genomes[params.genome].kgp //1000G_phase1.snps.high_confidence.hg38.vcf.gz" +DBSNP=file(params.genomes[params.genome].dbsnp) //dbsnp_138.hg38.vcf.gz" +GNOMADGERMLINE=params.genomes[params.genome].gnomad //somatic-hg38-af-only-gnomad.hg38.vcf.gz +PON=file(params.genomes[params.genome].pon) +VEPCACHEDIR=file(params.genomes[params.genome].vepcache) +VEPSPECIES=params.genomes[params.genome].vepspecies +VEPBUILD=params.genomes[params.genome].vepbuild +SOMATIC_FOREST=params.genomes[params.genome].octopus_sforest +GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest //Output outdir=file(params.output) @@ -15,6 +17,8 @@ outdir=file(params.output) process pileup_paired_tonly { + label 'process_highmem' + input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) @@ -27,7 +31,7 @@ process pileup_paired_tonly { """ gatk --java-options -Xmx48g GetPileupSummaries \ -I ${tumor} \ - -V ${KGP} \ + -V $KGPGERMLINE \ -L ${bed} \ -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table @@ -43,6 +47,7 @@ process pileup_paired_tonly { process contamination_tumoronly { + label 'process_highmem' publishDir(path: "${outdir}/vcfs/mutect2/", mode: 'copy') input: @@ -61,7 +66,7 @@ process contamination_tumoronly { """ gatk GatherPileupSummaries \ - --sequence-dictionary ${GENOMEDICT} \ + --sequence-dictionary $GENOMEDICT \ -I ${alltumor} -O ${tumorname}_allpileups.table gatk CalculateContamination \ @@ -81,6 +86,7 @@ process contamination_tumoronly { process learnreadorientationmodel_tonly { + label 'process_highmem' publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy') input: @@ -109,6 +115,7 @@ process learnreadorientationmodel_tonly { process mergemut2stats_tonly { + label 'process_low' publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy') input: @@ -136,7 +143,7 @@ process mergemut2stats_tonly { process mutect2_t_tonly { - + label 'process_somaticcaller' input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) @@ -150,12 +157,12 @@ process mutect2_t_tonly { """ gatk Mutect2 \ - --reference ${GENOME} \ + --reference $GENOMEREF \ --intervals ${bed} \ --input ${tumor} \ --tumor-sample ${tumor.simpleName} \ - --germline-resource ${GNOMAD} \ - --panel-of-normals ${PON} \ + $GNOMADGERMLINE \ + --panel-of-normals $PON \ --output ${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz \ --f1r2-tar-gz ${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz \ --independent-mates @@ -174,13 +181,16 @@ process mutect2_t_tonly { process mutect2filter_tonly { + label 'process_mid' publishDir(path: "${outdir}/vcfs/mutect2_tonly", mode: 'copy') input: tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups),path(tumorcontamination) output: - tuple val(sample), path("${sample}.tonly.mut2.marked.vcf.gz"), - path("${sample}.tonly.mut2.norm.vcf.gz"), path("${sample}.tonly.marked.vcf.gz.filteringStats.tsv") + tuple val(sample), + path("${sample}.tonly.mut2.marked.vcf.gz"),path("${sample}.tonly.mut2.marked.vcf.gz.tbi"), + path("${sample}.tonly.mut2.norm.vcf.gz"),path("${sample}.tonly.mut2.norm.vcf.gz.tbi"), + path("${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv") script: //Include the stats and concat ${mutvcfs} -Oz -o ${sample}.concat.vcf.gz @@ -191,7 +201,7 @@ process mutect2filter_tonly { gatk GatherVcfs -I ${mut2in} -O ${sample}.tonly.concat.vcf.gz gatk IndexFeatureFile -I ${sample}.tonly.concat.vcf.gz gatk FilterMutectCalls \ - -R ${GENOME} \ + -R $GENOMEREF \ -V ${sample}.tonly.concat.vcf.gz \ --ob-priors ${obs} \ --contamination-table ${tumorcontamination} \ @@ -199,32 +209,31 @@ process mutect2filter_tonly { -O ${sample}.tonly.mut2.marked.vcf.gz gatk SelectVariants \ - -R ${GENOME} \ - --variant ${sample}.tonly.marked.vcf.gz \ + -R $GENOMEREF \ + --variant ${sample}.tonly.mut2.marked.vcf.gz \ --exclude-filtered \ --output ${sample}.tonly.mut2.final.vcf.gz - bcftools sort ${sample}.tonly.mut2.final.vcf.gz -@ 16 -Oz |\ - bcftools norm --threads 16 --check-ref s -f $GENOME -O v |\ + bcftools sort ${sample}.tonly.mut2.final.vcf.gz |\ + bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\t"; print}}' |\ - sed '/^\$/d' > ${sample}.tonly.mut2.norm.vcf.gz + sed '/^\$/d' |\ + bcftools view - -Oz -o ${sample}.tonly.mut2.norm.vcf.gz + bcftools index -t ${sample}.tonly.mut2.norm.vcf.gz """ stub: """ - touch ${sample}.tonly.mut2.marked.vcf.gz - touch ${sample}.tonly.mut2.norm.vcf.gz - touch ${sample}.tonly.marked.vcf.gz.filteringStats.tsv + touch ${sample}.tonly.mut2.marked.vcf.gz ${sample}.tonly.mut2.marked.vcf.gz.tbi + touch ${sample}.tonly.mut2.norm.vcf.gz ${sample}.tonly.mut2.norm.vcf.gz.tbi + touch ${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv """ } - - - - process varscan_tonly { + label 'process_somaticcaller' input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed), @@ -232,50 +241,53 @@ process varscan_tonly { output: tuple val(tumorname), - path("${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf") + path("${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf.gz") shell: - """ + ''' varscan_opts="--strand-filter 0 --min-var-freq 0.01 --output-vcf 1 --variants 1" - pileup_cmd="samtools mpileup -d 100000 -q 15 -Q 15 -f !GENOME !{tumor}" + pileup_cmd="samtools mpileup -d 100000 -q 15 -Q 15 -f !{GENOMEREF} !{tumor}" varscan_cmd="varscan mpileup2cns <($pileup_cmd) $varscan_opts" + eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf" - eval "$varscan_cmd > {output.vcf}.gz" - eval "bcftools view -U {output.vcf}.gz > {output.vcf}" - """ + printf "TUMOR\t!{tumorname}\n" > sampname + + bcftools reheader -s sampname !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf \ + | bcftools view -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf.gz + + ''' stub: - """ - touch ${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf - + touch ${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf.gz """ } + process vardict_tonly { - + label 'process_highcpu' input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) output: tuple val(tumorname), - path("${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf") + path("${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz") script: """ - VarDict -G $GENOME \ - -f 0.05 \ + bedtools makewindows -b ${bed} -w 50150 -s 50000 > temp_${bed} + + VarDict -G $GENOMEREF \ + -f 0.01 \ -x 500 \ --nosv \ - -b ${tumor} \ - -t -Q 20 -c 1 -S 2 -E 3 \ - -R ${bed} \ - | teststrandbias.R \ - | var2vcf_valid.pl \ + -b ${tumor} --fisher \ + -t -Q 20 -c 1 -S 2 -E 3 --th $task.cpus \ + temp_${bed} | var2vcf_valid.pl \ -N ${tumor} \ -Q 20 \ -d 10 \ @@ -284,25 +296,97 @@ process vardict_tonly { -E \ -f 0.05 > ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf + printf "${tumor.Name}\t${tumorname}\n" > sampname + + bcftools reheader -s sampname ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf \ + | bcftools view -Oz -o ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz + """ stub: """ - touch ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf + touch ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz + + """ + +} + + +process octopus_tonly { + //label 'process_highcpu' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), path(bed) + + output: + tuple val(tumorname), + path("${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz") + + script: """ + octopus -R $GENOMEREF -C cancer -I ${tumor} \ + --annotations AC AD DP \ + --target-working-memory 64Gb \ + -t ${bed} \ + $SOMATIC_FOREST \ + -o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz --threads $task.cpus + """ + + stub: + + """ + touch ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz + """ } + +process somaticcombine_tonly { + label 'process_mid' + publishDir(path: "${outdir}/vcfs/combined_tonly", mode: 'copy') + + input: + tuple val(tumorsample), + val(callers), + path(vcfs), path(vcfindex) + + output: + tuple val(tumorsample), + path("${tumorsample}_combined_tonly.vcf.gz"), + path("${tumorsample}_combined_tonly.vcf.gz.tbi") + + script: + vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + + """ + java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \ + -R $GENOMEREF \ + --genotypeMergeOption PRIORITIZE \ + --priority_list mutect2_tonly,octopus_tonly,vardict_tonly,varscan_tonly \ + --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \ + -O ${tumorsample}_combined_tonly.vcf.gz \ + $vcfin2 + """ + + stub: + """ + touch ${tumorsample}_combined_tonly.vcf.gz ${tumorsample}_combined_tonly.vcf.gz.tbi + """ + +} + process annotvep_tonly { publishDir("${outdir}/mafs", mode: "copy") input: tuple val(tumorsample), - val(vc), path(tumorvcf) + val(vc), path(tumorvcf), + path(vcfindex) output: @@ -310,19 +394,47 @@ process annotvep_tonly { shell: - """ + ''' + VCF_SAMPLE_IDS=($(bcftools query -l !{tumorvcf})) + TID_IDX=0 + NID_IDX="" + VCF_NID="" + NORM_VCF_ID_ARG="" + NSAMPLES=${#VCF_SAMPLE_IDS[@]} + if [ $NSAMPLES -gt 1 ]; then + # Assign tumor, normal IDs + # Look through column names and + # see if they match provided IDs + for (( i = 0; i < $NSAMPLES; i++ )); do + echo "${VCF_SAMPLE_IDS[$i]}" + if [ "${VCF_SAMPLE_IDS[$i]}" == !{tumorsample} ]; then + TID_IDX=$i + fi + + done + + if [ ! -z $NID_IDX ]; then + VCF_NID=${VCF_SAMPLE_IDS[$NID_IDX]} + NORM_VCF_ID_ARG="--vcf-normal-id $VCF_NID" + fi + fi + VCF_TID=${VCF_SAMPLE_IDS[$TID_IDX]} + + zcat !{tumorvcf} > !{tumorvcf.baseName} - zcat !{tumorvcf}.vcf.gz > !{tumorvcf}.vcf + mkdir -p tumor_only/!{vc} vcf2maf.pl \ - --vep-forks 16 --input-vcf !{tumorvcf}.vcf \ - --output-maf !{vc}/!{tumorsample}.tonly.maf \ + --vep-forks !{task.cpus} --input-vcf !{tumorvcf.baseName} \ + --output-maf tumor_only/!{vc}/!{tumorsample}.tonly.maf \ --tumor-id !{tumorsample} \ --vep-path /opt/vep/src/ensembl-vep \ - --vep-data $VEP_CACHEDIR \ - --ncbi-build GRCh38 --species homo_sapiens --ref-fasta !{GENOME} + --vep-data !{VEPCACHEDIR} \ + --ncbi-build !{VEPBUILD} --species !{VEPSPECIES} --ref-fasta !{GENOMEREF} \ + --vep-overwrite - """ + + ''' stub: """ @@ -332,6 +444,7 @@ process annotvep_tonly { } process combinemafs_tonly { + label 'process_low' publishDir(path: "${outdir}/mafs/tumor_only", mode: 'copy') input: From cfdfe4c40b82c948cbfa455f3b5a431c144a74a6 Mon Sep 17 00:00:00 2001 From: dnousome Date: Mon, 11 Dec 2023 15:16:51 -0500 Subject: [PATCH 05/58] fix: publish mode --- nextflow.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nextflow.config b/nextflow.config index 1581cff..1915606 100644 --- a/nextflow.config +++ b/nextflow.config @@ -61,6 +61,9 @@ params { fastq_input=null bam_input=null file_input=null + + publish_dir_mode = 'symlink' + } From 46091f75090dfeebc4fbcb346b3ae7b8b5adfde3 Mon Sep 17 00:00:00 2001 From: dnousome Date: Mon, 11 Dec 2023 15:20:50 -0500 Subject: [PATCH 06/58] fix: change output dir --- main.nf | 2 +- modules/local/copynumber.nf | 1 - modules/local/qc.nf | 2 -- modules/local/structural_variant.nf | 1 - modules/local/trim_align.nf | 1 - modules/local/variant_calling.nf | 2 -- modules/local/variant_calling_tonly.nf | 4 ---- nextflow.config | 1 + 8 files changed, 2 insertions(+), 12 deletions(-) diff --git a/main.nf b/main.nf index 4babf3a..e66c6ea 100644 --- a/main.nf +++ b/main.nf @@ -31,7 +31,7 @@ log.info """\ W G S S E E K P I P E L I N E ============================= genome: ${params.genome} - outdir: ${params.output} + outdir: ${params.outdir} Samplesheet: ${params.sample_sheet} Samples: ${params.fastq_input} ${params.file_input} ${params.bam_input} """ diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf index b15a8c2..392270e 100644 --- a/modules/local/copynumber.nf +++ b/modules/local/copynumber.nf @@ -24,7 +24,6 @@ HOTSPOTS='/data/SCLC-BRAINMETS/cn/variants/KnownHotspots.somatic.38.vcf.gz' //DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) //ascatR= -outdir=file(params.output) //mm10 Paired-Sequenza, FREEC-tumor only process seqz_sequenza_bychr { diff --git a/modules/local/qc.nf b/modules/local/qc.nf index 65515e8..dd2bdfa 100644 --- a/modules/local/qc.nf +++ b/modules/local/qc.nf @@ -16,8 +16,6 @@ SCRIPT_PATH_PCA = file(params.script_ancestry) //OUTPUT DIRECTORY -outdir=file(params.output) - process fc_lane { label 'process_low' publishDir("${outdir}/QC/fc_lane/", mode:'copy') diff --git a/modules/local/structural_variant.nf b/modules/local/structural_variant.nf index d807753..6182624 100644 --- a/modules/local/structural_variant.nf +++ b/modules/local/structural_variant.nf @@ -3,7 +3,6 @@ GENOME=params.genome BWAGENOME=file(params.genomes[params.genome].bwagenome) DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) -outdir=file(params.output) process svaba_somatic { diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf index fefe243..19dfa81 100644 --- a/modules/local/trim_align.nf +++ b/modules/local/trim_align.nf @@ -1,6 +1,5 @@ GENOMEREF=file(params.genomes[params.genome].genome) KNOWNRECAL = params.genomes[params.genome].KNOWNRECAL -outdir=file(params.output) process fastp { diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index d7b3bf9..9d7892e 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -12,8 +12,6 @@ SOMATIC_FOREST=params.genomes[params.genome].octopus_sforest GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest LOFREQ_CONVERT=params.lofreq_convert -//Output -outdir=file(params.output) process mutect2 { diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index 3d67e26..a252597 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -11,10 +11,6 @@ VEPBUILD=params.genomes[params.genome].vepbuild SOMATIC_FOREST=params.genomes[params.genome].octopus_sforest GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest -//Output -outdir=file(params.output) - - process pileup_paired_tonly { label 'process_highmem' diff --git a/nextflow.config b/nextflow.config index 1915606..48b8d2c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -63,6 +63,7 @@ params { file_input=null publish_dir_mode = 'symlink' + outdir = 'results' } From f74061e7b33fa512a1640b1ac30c584d288ce9bb Mon Sep 17 00:00:00 2001 From: dnousome Date: Mon, 11 Dec 2023 15:36:53 -0500 Subject: [PATCH 07/58] fix: corrected the pipeline order --- main.nf | 156 +++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 115 insertions(+), 41 deletions(-) diff --git a/main.nf b/main.nf index e66c6ea..e1e7422 100644 --- a/main.nf +++ b/main.nf @@ -6,21 +6,36 @@ date = new Date().format( 'yyyyMMdd' ) //SUB WORKFLOWS to SPLIT PIPE_ALIGN=params.PIPE_ALIGN -PIPE_GERMLINE=params.PIPE_GERMLINE + PIPE_VC=params.PIPE_VC PIPE_SV=params.PIPE_SV -PIPE_QC=params.PIPE_QC -PIPE_BAMVC=params.PIPE_BAMVC +PIPE_CNV=params.PIPE_CNV + +PIPE_QC_GL=params.PIPE_QC_GL +PIPE_QC_NOGL=params.PIPE_QC_NOGL + +PIPE_GL=params.PIPE_GL + PIPE_TONLY_ALIGN=params.PIPE_TONLY_ALIGN PIPE_TONLY_VC=params.PIPE_TONLY_VC -PIPE_TONLY_BAMVC=params.PIPE_TONLY_BAMVC +PIPE_TONLY_SV=params.PIPE_TONLY_SV +PIPE_TONLY_CNV=params.PIPE_TONLY_CNV PIPE_TONLY_QC=params.PIPE_TONLY_QC +PIPE_BAMVC=params.PIPE_BAMVC +PIPE_BAMSV=params.PIPE_BAMCNV +PIPE_BAMCNV=params.PIPE_BAMCNV + +PIPE_TONLY_BAMVC=params.PIPE_TONLY_BAMVC +PIPE_TONLY_BAMSV=params.PIPE_TONLY_BAMSV +PIPE_TONLY_BAMCNV=params.PIPE_TONLY_BAMCNV + + -include {INPUT_PIPE;TRIM_ALIGN_PIPE; - GERMLINE_PIPE;VARIANTCALL_PIPE;INPUT_BAMVC_PIPE;SV_PIPE; - QC_PIPE} from "./subworkflows/local/workflows.nf" +include {INPUT; ALIGN; GL; + VC; INPUT_BAM; SV; CNVmouse; CNVhuman; + QC_GL; QC_NOGL} from "./subworkflows/local/workflows.nf" include {INPUT_TONLY; INPUT_TONLY_BAM; ALIGN_TONLY; @@ -46,69 +61,128 @@ workflow.onComplete { } } +//Final Workflow //Final Workflow workflow { if (PIPE_ALIGN){ - INPUT_PIPE() - TRIM_ALIGN_PIPE(INPUT_PIPE.out.fastqinput,INPUT_PIPE.out.sample_sheet) + INPUT() + ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) } - - //GermlineVC - if (PIPE_GERMLINE){ - INPUT_PIPE() - TRIM_ALIGN_PIPE(INPUT_PIPE.out.fastqinput,INPUT_PIPE.out.sample_sheet) - GERMLINE_PIPE(TRIM_ALIGN_PIPE.out.bambyinterval) + //Germline + if (PIPE_GL){ + INPUT() + ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) + GL(ALIGN.out.bambyinterval) } //Tumor-Normal Pipelines if (PIPE_VC){ - INPUT_PIPE() - TRIM_ALIGN_PIPE(INPUT_PIPE.out.fastqinput,INPUT_PIPE.out.sample_sheet) - VARIANTCALL_PIPE(TRIM_ALIGN_PIPE.out.bamwithsample,TRIM_ALIGN_PIPE.out.splitout,TRIM_ALIGN_PIPE.out.sample_sheet) + INPUT() + ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) + VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet) } - if (PIPE_QC){ - INPUT_PIPE() - TRIM_ALIGN_PIPE(INPUT_PIPE.out.fastqinput,INPUT_PIPE.out.sample_sheet) - GERMLINE_PIPE(TRIM_ALIGN_PIPE.out.bambyinterval) - QC_PIPE(TRIM_ALIGN_PIPE.out.fastqin,TRIM_ALIGN_PIPE.out.fastpout,TRIM_ALIGN_PIPE.out.bwamem2out,GERMLINE_PIPE.out.glnexusout,GERMLINE_PIPE.out.bcfout) - + if (PIPE_QC_GL){ + INPUT() + ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) + GL(ALIGN.out.bambyinterval) + QC_GL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout,GL.out.glnexusout,GL.out.bcfout) + } + if (PIPE_QC_NOGL){ + INPUT() + ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) + QC_NOGL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout) } if (PIPE_SV){ - INPUT_PIPE() - TRIM_ALIGN_PIPE(INPUT_PIPE.out.fastqinput,INPUT_PIPE.out.sample_sheet) - SV_PIPE(TRIM_ALIGN_PIPE.out.bamwithsample) + INPUT() + ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) + SV(ALIGN.out.bamwithsample) + } + if (PIPE_CNV){ + INPUT() + ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) + if (params.genome == "mm10"){ + CNVmouse(ALIGN.out.bamwithsample) + } else if (params.genome== "hg38"){ + VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet) + CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input) + } } if (PIPE_BAMVC){ - INPUT_BAMVC_PIPE() - VARIANTCALL_PIPE(INPUT_BAMVC_PIPE.out.bamwithsample,INPUT_BAMVC_PIPE.out.splitout,INPUT_BAMVC_PIPE.out.sample_sheet) + INPUT_BAM() + VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) + } + if (PIPE_BAMSV){ + INPUT_BAM() + SV(INPUT_BAM.out.bamwithsample) + } + if (PIPE_BAMCNV){ + INPUT_BAM() + if (params.genome == "mm10"){ + CNVmouse(INPUT_BAM.out.bamwithsample) + } else if (params.genome== "hg38"){ + VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) + CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input) + + } } ///Tumor Only Pipelines if (PIPE_TONLY_ALIGN){ - INPUT_TONLY_PIPE() - TRIM_ALIGN_TONLY_PIPE(INPUT_TONLY_PIPE.out.fastqinput,INPUT_TONLY_PIPE.out.sample_sheet) + INPUT_TONLY() + ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) } if (PIPE_TONLY_VC){ - INPUT_TONLY_PIPE() - TRIM_ALIGN_TONLY_PIPE(INPUT_TONLY_PIPE.out.fastqinput,INPUT_TONLY_PIPE.out.sample_sheet) - VARIANT_TONLY_PIPE(TRIM_ALIGN_TONLY_PIPE.out.bamwithsample,TRIM_ALIGN_TONLY_PIPE.out.splitout,TRIM_ALIGN_TONLY_PIPE.out.sample_sheet) + INPUT_TONLY() + ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) + VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet) } - if (PIPE_TONLY_QC){ - INPUT_TONLY_PIPE() - TRIM_ALIGN_TONLY_PIPE(INPUT_TONLY_PIPE.out.fastqinput,INPUT_TONLY_PIPE.out.sample_sheet) - QC_TONLY_PIPE(TRIM_ALIGN_TONLY_PIPE.out.fastqin,TRIM_ALIGN_TONLY_PIPE.out.fastpout,TRIM_ALIGN_TONLY_PIPE.out.bqsrout) + if (PIPE_TONLY_SV){ + INPUT_TONLY() + ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) + SV_TONLY(ALIGN_TONLY.out.bamwithsample) + } + if (PIPE_TONLY_CNV){ + INPUT_TONLY() + ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) + if (params.genome == "mm10"){ + CNVmouse_tonly(ALIGN_TONLY.out.bamwithsample) + } else if (params.genome== "hg38"){ + VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet) + CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input) + } } - //Variant Calling from BAM only/Tumor Only + if (PIPE_TONLY_QC){ + INPUT_TONLY() + ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) + QC_TONLY(ALIGN_TONLY.out.fastqin,ALIGN_TONLY.out.fastpout,ALIGN_TONLY.out.bqsrout) + + } + //Variant Calling from BAM-Tumor Only Mode if (PIPE_TONLY_BAMVC){ - INPUT_TONLY_BAMVC_PIPE() - VARIANT_TONLY_PIPE(INPUT_TONLY_BAMVC_PIPE.out.bamwithsample,INPUT_TONLY_BAMVC_PIPE.out.splitout,INPUT_TONLY_BAMVC_PIPE.out.sample_sheet) + INPUT_TONLY_BAM() + VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) + } + if (PIPE_TONLY_BAMSV){ + INPUT_TONLY_BAM() + SV_TONLY(INPUT_TONLY_BAM.out.bamwithsample) + } + if (PIPE_TONLY_BAMCNV){ + INPUT_TONLY_BAM() + if (params.genome == "mm10"){ + CNVmouse_tonly(INPUT_TONLY_BAM.out.bamwithsample) + }else if (params.genome== "hg38"){ + VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) + CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input) + + } } } + From 4c045cdc4c22b7d22ec3518bcf551b8ded3dee0e Mon Sep 17 00:00:00 2001 From: dnousome Date: Mon, 11 Dec 2023 15:40:03 -0500 Subject: [PATCH 08/58] fix: output rename --- modules/local/germline.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/local/germline.nf b/modules/local/germline.nf index d3544a5..6896f68 100644 --- a/modules/local/germline.nf +++ b/modules/local/germline.nf @@ -1,8 +1,6 @@ GENOMEREF=file(params.genomes[params.genome].genome) MODEL="/opt/models/wgs/model.ckpt" -//Output Directory -outdir=file(params.output) //Processes //Deep Variant From 1431f64187f121c5b0f14c44af5718ffd84baf1c Mon Sep 17 00:00:00 2001 From: dnousome Date: Mon, 11 Dec 2023 15:40:45 -0500 Subject: [PATCH 09/58] fix: copy number --- subworkflows/local/workflows.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 96713b5..d9dab2d 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -46,7 +46,7 @@ include {svaba_somatic; manta_somatic; annotsv_tn as annotsv_svaba;annotsv_tn as annotsv_manta} from '../../modules/local/structural_variant.nf' include {amber_tn; cobalt_tn; purple; - sequenza; seqz_sequenza_bychr; freec; freec_paired } from './copynumber.nf' + sequenza; seqz_sequenza_bychr; freec; freec_paired } from '../../modules/local/copynumber.nf' include {splitinterval} from '../../modules/local/splitbed.nf' From 38465d6f2becdfd54ddd384f23f904142197b5b1 Mon Sep 17 00:00:00 2001 From: dnousome Date: Mon, 11 Dec 2023 15:41:56 -0500 Subject: [PATCH 10/58] fix: sv location --- subworkflows/local/workflows_tonly.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index 07c274c..ca5eb5f 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -41,9 +41,9 @@ include {mutect2_t_tonly; mutect2filter_tonly; pileup_paired_tonly; include {manta_tonly; svaba_tonly; survivor_sv; gunzip; annotsv_tonly as annotsv_manta_tonly; annotsv_tonly as annotsv_svaba_tonly; -annotsv_tonly as annotsv_survivor_tonly} from './structural_variant.nf' +annotsv_tonly as annotsv_survivor_tonly} from '../../modules/local/structural_variant.nf' -include {freec; amber_tonly; cobalt_tonly; purple } from './copynumber.nf' +include {freec; amber_tonly; cobalt_tonly; purple } from '../../modules/local/copynumber.nf' include {splitinterval} from '../../modules/local/splitbed.nf' From e1675b79c6142da256fdab829d008bd8d6508947 Mon Sep 17 00:00:00 2001 From: dnousome Date: Mon, 11 Dec 2023 15:43:35 -0500 Subject: [PATCH 11/58] fix: hg38 location --- conf/genomes.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/genomes.config b/conf/genomes.config index a7810a5..2ee6cdc 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -6,7 +6,7 @@ params { bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta" genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict" wgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" - intervals="${projectDir}/workflow/resources/hg38_v0_wgs_calling_regions.hg38.bed" + intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed" //millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //shapeitindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) // KNOWNINDELS= '/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_indel.vcf' From b640a4534edab083199a5c12302551afc000db26 Mon Sep 17 00:00:00 2001 From: dnousome Date: Mon, 11 Dec 2023 15:47:15 -0500 Subject: [PATCH 12/58] refactor: moved and all linked scripts --- nextflow.config | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/nextflow.config b/nextflow.config index 48b8d2c..c3b84d4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,19 +16,20 @@ includeConfig 'conf/containers.config' params { - fastq_screen_conf= "${projectDir}/workflow/resources/fastq_screen.conf" - get_flowcell_lanes="${projectDir}/workflow/scripts/flowcell_lane.py" - splitbed="${projectDir}/workflow/resources/split_Bed_into_equal_regions.py" + fastq_screen_conf = "${projectDir}/conf/fastq_screen.conf" + get_flowcell_lanes = "${projectDir}/bin/scripts/flowcell_lane.py" + splitbed= "${projectDir}/bin/split_Bed_into_equal_regions.py" + script_genderPrediction = "${projectDir}/bin/RScripts/predictGender.R" + script_combineSamples = "${projectDir}/bin/combineAllSampleCompareResults.R" + script_ancestry = "${projectDir}/bin/sampleCompareAncestoryPlots.R" + script_sequenza = "${projectDir}/bin/run_sequenza.R" + script_freec = "${projectDir}/bin/make_freec_genome.pl" + script_freecpaired = "${projectDir}/bin/freec_paired.pl" + freec_significance = "${projectDir}/bin/assess_significance.R" + freec_plot = "${projectDir}/bin/makeGraph.R" + lofreq_convert = "${projectDir}/bin/add_gt_lofreq.sh" split_regions = "36" //Number of regions to split by - script_genderPrediction = "${projectDir}/workflow/scripts/RScripts/predictGender.R" - script_combineSamples = "${projectDir}/workflow/scripts/RScripts/combineAllSampleCompareResults.R" - script_ancestry = "${projectDir}/workflow/scripts/RScripts/sampleCompareAncestoryPlots.R" - script_sequenza = "${projectDir}/workflow/scripts/RScripts/run_sequenza.R" - script_freec = "${projectDir}/workflow/scripts/make_freec_genome.pl" - script_freecpaired = "${projectDir}/workflow/scripts/freec_paired.pl" - freec_significance = "${projectDir}/workflow/scripts/assess_significance.R" - freec_plot = "${projectDir}/workflow/scripts/makeGraph.R" - lofreq_convert = "${projectDir}/workflow/scripts/add_gt_lofreq.sh" + vep_cache = "/fdb/VEP/102/cache" //SUB WORKFLOWS to SPLIT From 4dbed734e2299365c42f78089986ed2b7b7a3a26 Mon Sep 17 00:00:00 2001 From: dnousome Date: Mon, 11 Dec 2023 15:49:18 -0500 Subject: [PATCH 13/58] fix: disable singularity for stub --- conf/ci_stub.config | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/conf/ci_stub.config b/conf/ci_stub.config index 6273277..0fc4817 100644 --- a/conf/ci_stub.config +++ b/conf/ci_stub.config @@ -15,4 +15,8 @@ params { process { cpus = 1 memory = '1.GB' + + singularity { + enabled = false + } } From c0354b1c55d32e77d48e3b6364f8681ee06d9667 Mon Sep 17 00:00:00 2001 From: dnousome Date: Mon, 11 Dec 2023 15:53:46 -0500 Subject: [PATCH 14/58] fix: lower mem for stub --- conf/ci_stub.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/ci_stub.config b/conf/ci_stub.config index 0fc4817..808f53f 100644 --- a/conf/ci_stub.config +++ b/conf/ci_stub.config @@ -6,7 +6,7 @@ params { outdir = 'results/test' max_cpus = 2 // for GitHub Actions https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources - max_memory = '6.GB' + max_memory = '4.GB' max_time = '6.h' publish_dir_mode = "symlink" From defaf906fe851f8bfc5e01d18245942e41cce127 Mon Sep 17 00:00:00 2001 From: dnousome Date: Mon, 11 Dec 2023 16:57:41 -0500 Subject: [PATCH 15/58] refactor: moved publishdir to module.config --- conf/modules.config | 280 +++++++++++++++++++++++++ modules/local/copynumber.nf | 7 - modules/local/germline.nf | 10 +- modules/local/qc.nf | 32 +-- modules/local/structural_variant.nf | 7 - modules/local/trim_align.nf | 4 +- modules/local/variant_calling.nf | 13 -- modules/local/variant_calling_tonly.nf | 5 - 8 files changed, 285 insertions(+), 73 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index ff3e484..3c8a778 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -8,4 +8,284 @@ process { errorStrategy = 'finish' + withName: sequenza { + publishDir = [ + path: { "${params.outdir}/cnv/sequenza" }, + mode: 'copy' + ] + } + + + withName: freec_paired { + publishDir = [ + path: { "${params.outdir}/cnv/freec" }, + mode: 'copy' + ] + } + + withName: freec { + publishDir = [ + path: { "${params.outdir}/cnv/freec" }, + mode: 'copy' + ] + } + + withName: 'amber_tonly|amber_tn' { + publishDir = [ + path: { "${params.outdir}/cnv/amber" }, + mode: 'copy' + ] + } + + withName: 'cobalt_tonly|cobalt_tn' { + publishDir = [ + path: { "${params.outdir}/cnv/cobalt" }, + mode: 'copy' + ] + } + + withName: 'purple' { + publishDir = [ + path: { "${params.outdir}/cnv/purple" }, + mode: 'copy' + ] + } + + withName: 'deepvariant_step3|deepvariant_combined|glnexus' { + publishDir = [ + path: { "${params.outdir}/germline/deepvariant" }, + mode: 'copy' + ] + module=['deepvariant/1.4.0'] + } + + withName: 'deepvariant_step1|deepvariant_step2' { + module = ['deepvariant/1.4.0'] + } + + withName: 'fc_lane' { + publishDir = [ + path: { "${params.outdir}/QC/fc_lane" }, + mode: 'copy' + ] + } + + withName: 'fastq_screen' { + publishDir = [ + path: { "${params.outdir}/QC/fastq_screen" }, + mode: 'copy' + ] + } + + withName: 'kraken' { + publishDir = [ + path: { "${params.outdir}/QC/kraken" }, + mode: 'copy' + ] + } + + + withName: 'fastqc' { + publishDir = [ + path: { "${params.outdir}/QC/fastqc" }, + mode: 'copy' + ] + } + + withName: 'qualimap|qualimap_bamqc' { + publishDir = [ + path: { "${params.outdir}/QC/qualimap" }, + mode: 'copy' + ] + } + + withName: 'samtools_flagstats' { + publishDir = [ + path: { "${params.outdir}/QC/samtools_flagstats" }, + mode: 'copy' + ] + } + + withName: 'mosdepth' { + publishDir = [ + path: { "${params.outdir}/QC/mosdepth" }, + mode: 'copy' + ] + } + + withName: 'vcftools' { + publishDir = [ + path: { "${params.outdir}/QC/vcftools" }, + mode: 'copy' + ] + } + + withName: 'collectvariantcallmetrics' { + publishDir = [ + path: { "${params.outdir}/QC/collectvariantcallmetrics" }, + mode: 'copy' + ] + } + + withName: 'bcftools_stats' { + publishDir = [ + path: { "${params.outdir}/QC/bcftools_stat" }, + mode: 'copy' + ] + } + + withName: 'gatk_varianteval' { + publishDir = [ + path: { "${params.outdir}/QC/gatk_varianteval" }, + mode: 'copy' + ] + } + + withName: 'snpeff' { + publishDir = [ + path: { "${params.outdir}/QC/snpeff" }, + mode: 'copy' + ] + } + + withName: 'somalier_extract|somalier_analysis_human|somalier_analysis_mouse' { + publishDir = [ + path: { "${params.outdir}/QC/somalier" }, + mode: 'copy' + ] + } + + withName: 'multiqc' { + publishDir = [ + path: { "${params.outdir}/QC/multiqc" }, + mode: 'copy' + ] + } + + withName: 'svaba_somatic' { + publishDir = [ + path: { "${params.outdir}/SV/svaba" }, + mode: 'copy' + ] + } + + withName: 'svaba_tonly' { + publishDir = [ + path: { "${params.outdir}/SV/svaba_tonly" }, + mode: 'copy' + ] + } + + withName: 'manta_somatic' { + publishDir = [ + path: { "${params.outdir}/SV/manta" }, + mode: 'copy' + ] + } + + withName: 'manta_tonly' { + publishDir = [ + path: { "${params.outdir}/SV/manta_tonly" }, + mode: 'copy' + ] + } + + withName: 'annotsv_tn' { + publishDir = [ + path: { "${params.outdir}/SV/annotated" }, + mode: 'copy' + ] + } + + withName: 'annotsv_tonly' { + publishDir = [ + path: { "${params.outdir}/SV/annotated_tonly" }, + mode: 'copy' + ] + } + + withName: 'survivor_sv' { + publishDir = [ + path: { "${params.outdir}/SV/survivor" }, + mode: 'copy' + ] + } + + withName: 'fastp' { + publishDir = [ + path: { "${params.outdir}/QC/fastp" }, + mode: 'copy', + pattern: '{*fastp.json,*fastp.html}' + ] + } + + withName: 'applybqsr|samtoolsindex' { + publishDir = [ + path: { "${params.outdir}/bams/BQSR" }, + mode: 'copy' + ] + } + + withName: 'contamination_tumoronly|learnreadorientationmodel_tonly|learnreadorientationmodel|mergemut2stats|mergemut2stats_tonly|contamination_paired|mutect2filter' { + publishDir = [ + path: { "${params.outdir}/vcfs/mutect2" }, + mode: 'copy' + ] + } + + withName: 'mutect2filter_tonly' { + publishDir = [ + path: { "${params.outdir}/vcfs/mutect2_tonly" }, + mode: 'copy' + ] + } + + withName: 'annotvep_tonly|annotvep_tn' { + publishDir = [ + path: { "${params.outdir}/mafs" }, + mode: 'copy' + ] + } + + withName: 'combinemafs_tonly' { + publishDir = [ + path: { "${params.outdir}/mafs/tumor_only" }, + mode: 'copy' + ] + } + + withName: 'combinemafs_tn' { + publishDir = [ + path: { "${params.outdir}/mafs/paired" }, + mode: 'copy' + ] + } + + withName: 'combineVariants|combineVariants_alternative' { + publishDir = [ + path: { "${params.outdir}/vcfs" }, + mode: 'copy' + ] + } + + withName: 'combineVariants_strelka' { + publishDir = [ + path: { "${params.outdir}/vcfs/strelka" }, + mode: 'copy' + ] + } + + withName: 'somaticcombine_tonly' { + publishDir = [ + path: { "${params.outdir}/vcfs/combined_tonly" }, + mode: 'copy' + ] + } + + withName: 'somaticcombine' { + publishDir = [ + path: { "${params.outdir}/vcfs/combined" }, + mode: 'copy' + ] + } } diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf index 392270e..757465a 100644 --- a/modules/local/copynumber.nf +++ b/modules/local/copynumber.nf @@ -58,7 +58,6 @@ process seqz_sequenza_bychr { process sequenza { label 'process_highcpu' - publishDir("${outdir}/cnv/sequenza", mode: 'copy') input: tuple val(pairid), path(seqz) @@ -124,7 +123,6 @@ process sequenza { process freec_paired { label 'process_highcpu' - publishDir("${outdir}/cnv/freec", mode: 'copy') input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai) @@ -171,7 +169,6 @@ process freec_paired { process freec { label 'process_mid' - publishDir("${outdir}/cnv/freec", mode: 'copy') input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -217,7 +214,6 @@ process freec { process amber_tonly { label 'process_mid' - publishDir("${outdir}/cnv/amber", mode: 'copy') input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -253,7 +249,6 @@ process amber_tonly { process amber_tn { label 'process_mid' - publishDir("${outdir}/cnv/amber", mode: 'copy') input: tuple val(tumorname), path(tumor), path(tumorbai), @@ -290,7 +285,6 @@ process amber_tn { process cobalt_tonly { label "process_mid" - publishDir("${outdir}/cnv/cobalt", mode: 'copy') input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -324,7 +318,6 @@ process cobalt_tonly { process cobalt_tn { label "process_mid" - publishDir("${outdir}/cnv/cobalt", mode: 'copy') input: tuple val(tumorname), path(tumor), path(tumorbai), diff --git a/modules/local/germline.nf b/modules/local/germline.nf index 6896f68..c106683 100644 --- a/modules/local/germline.nf +++ b/modules/local/germline.nf @@ -5,7 +5,6 @@ MODEL="/opt/models/wgs/model.ckpt" //Processes //Deep Variant process deepvariant_step1 { - module=['deepvariant/1.4.0'] input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path(bed) @@ -41,7 +40,6 @@ process deepvariant_step1 { //Step 2 requires GPU process deepvariant_step2 { - module=['deepvariant/1.4.0'] input: tuple val(samplename), path(tfrecords), path(tfgvcf) @@ -70,9 +68,7 @@ process deepvariant_step2 { //Step 3 DV process deepvariant_step3 { - publishDir("${outdir}/deepvariant", mode: 'copy') - module=['deepvariant/1.4.0'] input: tuple val(samplename), path(tfrecords), path("${samplename}_call_variants_output.tfrecord.gz"), @@ -104,9 +100,7 @@ process deepvariant_step3 { //Combined DeepVariant process deepvariant_combined { - module=['deepvariant/1.4.0'] - publishDir("${outdir}/deepvariant", mode: 'copy') input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") @@ -140,9 +134,7 @@ process deepvariant_combined { process glnexus { - module=['glnexus','bcftools'] - - publishDir("${outdir}/deepvariant", mode: 'copy') + input: path(gvcfs) diff --git a/modules/local/qc.nf b/modules/local/qc.nf index dd2bdfa..82bcc1a 100644 --- a/modules/local/qc.nf +++ b/modules/local/qc.nf @@ -18,7 +18,6 @@ SCRIPT_PATH_PCA = file(params.script_ancestry) //OUTPUT DIRECTORY process fc_lane { label 'process_low' - publishDir("${outdir}/QC/fc_lane/", mode:'copy') input: tuple val(samplename), path(fqs) @@ -46,7 +45,6 @@ process fc_lane { process fastq_screen { //Uses Trimmed Files - publishDir(path: "${outdir}/QC/fastq_screen/", mode:'copy') input: tuple val(samplename), @@ -96,7 +94,6 @@ process kraken { @Output: Kraken logfile and interative krona report */ - publishDir(path: "${outdir}/QC/kraken/", mode: 'copy') input: tuple val(samplename), @@ -148,7 +145,6 @@ process fastqc { FastQC report and zip file containing sequencing quality information """ - publishDir(path: "${outdir}/QC/fastqc/", mode: 'copy') input: tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bai") @@ -186,12 +182,8 @@ process qualimap_bamqc { Recalibrated BAM file (scatter) @Output: Report containing post-aligment quality-control metrics - */ - publishDir("${outdir}/QC/qualimap/", mode: "copy") - - //module=['qualimap/2.2.1','java/12.0.1'] - //module: config['images']['qualimap'] - + */ + input: tuple val(samplename), path(bam), path(bai) @@ -233,8 +225,6 @@ process samtools_flagstats { Text file containing alignment statistics */ label 'process_mid' - - publishDir("${outdir}/QC/flagstats/", mode: "copy") input: tuple val(samplename), path(bam), path(bai) @@ -268,9 +258,6 @@ process mosdepth { `{prefix}.quantized.bed.gz` (if --quantize is specified) `{prefix}.thresholds.bed.gz` (if --thresholds is specified) */ - - publishDir("${outdir}/QC/mosdepth/", mode: "copy") - input: tuple val(samplename), path(bam), path(bai) @@ -309,7 +296,6 @@ process vcftools { */ label 'process_mid' - publishDir(path:"${outdir}/QC/vcftools", mode: 'copy') input: tuple path(germlinevcf),path(germlinetbi) @@ -338,9 +324,7 @@ process collectvariantcallmetrics { Multi-sample gVCF file (indirect-gather-due-to-aggregation) @Output: Text file containing a collection of metrics relating to snps and indels - */ - publishDir("${outdir}/QC/variantmetrics", mode: 'copy') - + */ input: tuple path(germlinevcf),path(germlinetbi) @@ -381,7 +365,6 @@ process bcftools_stats { */ label 'process_mid' - publishDir("${outdir}/QC/bcftoolsstat", mode: 'copy') input: tuple val(samplename), path("${samplename}.gvcf.gz"),path("${samplename}.gvcf.gz.tbi") @@ -415,8 +398,6 @@ process gatk_varianteval { */ label 'process_mid' - publishDir("${outdir}/QC/gatk_varianteval", mode: 'copy') - input: tuple val(samplename), path("${samplename}.gvcf.gz") ,path("${samplename}.gvcf.gz.tbi") output: @@ -458,7 +439,6 @@ process snpeff { Evaluation table containing a collection of summary statistics */ label 'process_mid' - publishDir("${outdir}/QC/snpeff", mode: 'copy') input: tuple val(samplename), path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi") @@ -495,7 +475,6 @@ process somalier_extract { Exracted sites in (binary) somalier format */ label 'process_low' - publishDir("${outdir}/QC/somalier", mode: 'copy') input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") @@ -536,7 +515,6 @@ process somalier_analysis_human { */ label 'process_low' - publishDir("${outdir}/QC/somalier", mode: 'copy') input: path(somalierin) @@ -601,8 +579,6 @@ process somalier_analysis_mouse { */ label 'process_low' - publishDir("${outdir}/QC/somalier", mode: 'copy') - input: path(somalierin) @@ -653,8 +629,6 @@ process multiqc { @Output: Interactive MulitQC report and a QC metadata table """ - - publishDir("${outdir}/QC/multiqc", mode: 'copy') input: path(allqcin) diff --git a/modules/local/structural_variant.nf b/modules/local/structural_variant.nf index 6182624..a6f58f4 100644 --- a/modules/local/structural_variant.nf +++ b/modules/local/structural_variant.nf @@ -8,8 +8,6 @@ DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) process svaba_somatic { label 'process_highcpu' - publishDir(path: "${outdir}/SV/svaba", mode: 'copy') - input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai) @@ -60,7 +58,6 @@ process svaba_somatic { process manta_somatic { label 'process_highcpu' - publishDir(path: "${outdir}/SV/manta", mode: 'copy') input: tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai) @@ -107,7 +104,6 @@ process annotsv_tn { //Requires bedtools,bcftools module = ['annotsv/3.3.1'] - publishDir(path: "${outdir}/SV/annotated", mode: 'copy') input: tuple val(tumorname), path(somaticvcf), val(sv) @@ -141,7 +137,6 @@ process annotsv_tn { process manta_tonly { label 'process_highcpu' - publishDir(path: "${outdir}/SV/manta_tonly", mode: 'copy') input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -184,7 +179,6 @@ process manta_tonly { process svaba_tonly { label 'process_highcpu' - publishDir(path: "${outdir}/SV/svaba_tonly", mode: 'copy') input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -250,7 +244,6 @@ process gunzip { process survivor_sv { module = ['survivor'] - publishDir(path: "${outdir}/SV/survivor", mode: 'copy') input: tuple val(tumorname), diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf index 19dfa81..4fa34db 100644 --- a/modules/local/trim_align.nf +++ b/modules/local/trim_align.nf @@ -5,7 +5,6 @@ KNOWNRECAL = params.genomes[params.genome].KNOWNRECAL process fastp { label 'process_mid' tag { name } - publishDir(path: "${outdir}/QC/fastp", mode: 'copy', pattern: '{*fastp.json,*fastp.html}') input: tuple val(samplename), path(fqs) @@ -135,8 +134,7 @@ process applybqsr { Base quality recalibration for all samples to */ label 'process_low' - publishDir(path: "${outdir}/bams/BQSR", mode: 'copy') - + input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path("${samplename}.recal_data.grp") diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index 9d7892e..f9fdff2 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -115,8 +115,6 @@ process pileup_paired_n { process contamination_paired { label 'process_highmem' - publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy') - input: tuple val(tumorname), path(tumor_pileups), @@ -170,8 +168,6 @@ process contamination_paired { process learnreadorientationmodel { label 'process_highmem' - publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy') - input: tuple val(sample), path(f1r2) @@ -197,8 +193,6 @@ process learnreadorientationmodel { process mergemut2stats { label 'process_low' - publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy') - input: tuple val(sample), path(stats) @@ -225,8 +219,6 @@ process mergemut2stats { process mutect2filter { label 'process_mid' - publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy') - input: tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs), path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination) @@ -556,7 +548,6 @@ process muse_tn { process combineVariants { label 'process_highmem' - publishDir(path: "${outdir}/vcfs/", mode: 'copy') input: tuple val(sample), path(inputvcf), val(vc) @@ -606,7 +597,6 @@ process combineVariants { process combineVariants_alternative { label 'process_highmem' - publishDir(path: "${outdir}/vcfs/", mode: 'copy') input: tuple val(sample), path(vcfs), path(vcfsindex), val(vc) @@ -681,7 +671,6 @@ process bcftools_index_octopus { process combineVariants_strelka { //Concat all somatic snvs/indels across all files, strelka separates snv/indels label 'process_mid' - publishDir(path: "${outdir}/vcfs/strelka", mode: 'copy') input: tuple val(sample), @@ -727,7 +716,6 @@ process combineVariants_strelka { process somaticcombine { label 'process_mid' - publishDir(path: "${outdir}/vcfs/combined", mode: 'copy') input: tuple val(tumorsample), val(normal), @@ -766,7 +754,6 @@ process somaticcombine { process annotvep_tn { - publishDir(path: "${outdir}/mafs/", mode: 'copy') input: tuple val(tumorsample), val(normalsample), diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index a252597..16e6e51 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -44,7 +44,6 @@ process pileup_paired_tonly { process contamination_tumoronly { label 'process_highmem' - publishDir(path: "${outdir}/vcfs/mutect2/", mode: 'copy') input: tuple val(tumorname), @@ -83,7 +82,6 @@ process contamination_tumoronly { process learnreadorientationmodel_tonly { label 'process_highmem' - publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy') input: tuple val(sample), path(f1r2) @@ -112,7 +110,6 @@ process learnreadorientationmodel_tonly { process mergemut2stats_tonly { label 'process_low' - publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy') input: tuple val(sample), path(stats) @@ -178,7 +175,6 @@ process mutect2_t_tonly { process mutect2filter_tonly { label 'process_mid' - publishDir(path: "${outdir}/vcfs/mutect2_tonly", mode: 'copy') input: tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups),path(tumorcontamination) @@ -441,7 +437,6 @@ process annotvep_tonly { process combinemafs_tonly { label 'process_low' - publishDir(path: "${outdir}/mafs/tumor_only", mode: 'copy') input: path(allmafs) From 9091ad8000b44bd925eb82c4b54a556917626cf7 Mon Sep 17 00:00:00 2001 From: dnousome Date: Wed, 13 Dec 2023 11:48:51 -0500 Subject: [PATCH 16/58] fix: varscan fixes --- conf/modules.config | 1 + modules/local/variant_calling_tonly.nf | 2 +- nextflow.config | 2 +- subworkflows/local/workflows.nf | 4 +-- subworkflows/local/workflows_tonly.nf | 35 ++++++++++++++------------ 5 files changed, 24 insertions(+), 20 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 3c8a778..5ac0c2d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -245,6 +245,7 @@ process { path: { "${params.outdir}/mafs" }, mode: 'copy' ] + errorStrategy='ignore' } withName: 'combinemafs_tonly' { diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index 16e6e51..1c58bc6 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -239,7 +239,7 @@ process varscan_tonly { ''' varscan_opts="--strand-filter 0 --min-var-freq 0.01 --output-vcf 1 --variants 1" - pileup_cmd="samtools mpileup -d 100000 -q 15 -Q 15 -f !{GENOMEREF} !{tumor}" + pileup_cmd="samtools mpileup -d 100000 -q 15 -Q 15 -f !{GENOMEREF} -l !{bed} !{tumor}" varscan_cmd="varscan mpileup2cns <($pileup_cmd) $varscan_opts" eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf" diff --git a/nextflow.config b/nextflow.config index c3b84d4..1718100 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,7 +28,7 @@ params { freec_significance = "${projectDir}/bin/assess_significance.R" freec_plot = "${projectDir}/bin/makeGraph.R" lofreq_convert = "${projectDir}/bin/add_gt_lofreq.sh" - split_regions = "36" //Number of regions to split by + split_regions = "24" //Number of regions to split by vep_cache = "/fdb/VEP/102/cache" diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index d9dab2d..875c317 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -546,8 +546,8 @@ workflow INPUT_BAM { baminputonly=Channel.fromPath(params.bam_input) .map{it-> tuple(it.simpleName,it,file("${it}.bai"))} } - if (bamcheck2.size()>0){ - bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)}.view() + else if (bamcheck2.size()>0){ + bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)} baminputonly=Channel.fromPath(params.bam_input) .map{it-> tuple(it.simpleName,it)} .join(bai) diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index ca5eb5f..04c0aaa 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -137,8 +137,8 @@ workflow VC_TONLY { bambyinterval=bamwithsample.combine(splitout.flatten()) pileup_paired_tonly(bambyinterval) pileup_paired_tout=pileup_paired_tonly.out.groupTuple() - .map{samplename,pileups-> tuple( samplename, - pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tumor.pileup.table/)[0][1].toInteger() } , + .map{samplename,pileups-> tuple( samplename, + pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tumor.pileup.table/)[0][1].toInteger() } , )} mutect2_t_tonly(bambyinterval) @@ -162,9 +162,9 @@ workflow VC_TONLY { mut2tonly_filter=mut2tonlyout.allmut2tonly - .join(mergemut2stats_tonly.out) - .join(learnreadorientationmodel_tonly.out) - .join(contamination_tumoronly.out) + | join(mergemut2stats_tonly.out) + | join(learnreadorientationmodel_tonly.out) + | join(contamination_tumoronly.out) mutect2_tonly_in=mutect2filter_tonly(mut2tonly_filter) | join(sample_sheet) @@ -173,15 +173,17 @@ workflow VC_TONLY { //VarDict - vardict_in_tonly=vardict_tonly(bambyinterval) | groupTuple()| map{tumor,vcf -> tuple(tumor,vcf,"vardict_tonly")} + vardict_in_tonly=vardict_tonly(bambyinterval) | groupTuple() + | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.vardict.vcf/)[0][1].toInteger()},"vardict_tonly")} | combineVariants_vardict_tonly | join(sample_sheet) | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"vardict_tonly",normvcf,normindex)} annotvep_tonly_vardict(vardict_in_tonly) //VarScan_tonly - varscan_in_tonly=bambyinterval.join(contamination_tumoronly.out) - | varscan_tonly | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf,"varscan")} + varscan_in_tonly=bambyinterval.combine(contamination_tumoronly.out,by: 0) + | varscan_tonly | groupTuple() + | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf/)[0][1].toInteger()},"varscan_tonly") | combineVariants_varscan_tonly | join(sample_sheet) | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"varscan_tonly",normvcf,normindex)} @@ -197,8 +199,9 @@ workflow VC_TONLY { annotvep_tonly_octopus(octopus_in_tonly) - mutect2_tonly_in|concat(octopus_in_tonly) - | concat(vardict_in_tonly)|concat(varscan_in_tonly) + mutect2_tonly_in | concat(octopus_in_tonly) + | concat(vardict_in_tonly) | concat(varscan_in_tonly) + | groupTuple() | somaticcombine_tonly | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} | annotvep_tonly_combined @@ -324,20 +327,20 @@ workflow INPUT_TONLY_BAM { main: //Either BAM Input or File sheet input if(params.bam_input){ - bambai = params.bam_input +".bai" + bambai = params.bam_input + ".bai" baionly = bambai.replace(".bam", "") bamcheck1 = file(bambai) bamcheck2 = file(baionly) if (bamcheck1.size()>0){ baminputonly=Channel.fromPath(params.bam_input) - .map{it-> tuple(it.simpleName,it,file("${it}.bai"))} + | map{it-> tuple(it.simpleName,it,file("${it}.bai"))} } - if (bamcheck2.size()>0){ - bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)}.view() + else if (bamcheck2.size()>0){ + bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)} baminputonly=Channel.fromPath(params.bam_input) - .map{it-> tuple(it.simpleName,it)} - .join(bai) + | map{it-> tuple(it.simpleName,it)} + | join(bai) } sample_sheet=baminputonly.map{samplename,bam,bai -> tuple ( From d84dc3ec505054415420c58252d219a602766e5d Mon Sep 17 00:00:00 2001 From: dnousome Date: Wed, 13 Dec 2023 11:57:57 -0500 Subject: [PATCH 17/58] feat: add setup --- modules/local/variant_calling.nf | 1 - setup.py | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 setup.py diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index f9fdff2..188a9e9 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -754,7 +754,6 @@ process somaticcombine { process annotvep_tn { - input: tuple val(tumorsample), val(normalsample), val(vc), path(tumorvcf), path(vcfindex) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..4a15e68 --- /dev/null +++ b/setup.py @@ -0,0 +1,4 @@ +import setuptools + +if __name__ == "__main__": + setuptools.setup() \ No newline at end of file From daf943a3eaf86265016ad8d4fcf4b36c584f7c3d Mon Sep 17 00:00:00 2001 From: dnousome Date: Wed, 13 Dec 2023 12:14:10 -0500 Subject: [PATCH 18/58] fix: add citation --- CITATION.cff | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 CITATION.cff diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..d1bc133 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,15 @@ +cff-version: 1.2.0 +message: "Please cite LOGAN as below." # TODO set up Zenodo to archive your tool and assign a DOI. Or if TOOL_NAME gets published in a journal, include the citation here. +authors: # TODO: author names should match those in pyproject.toml + - family-names: LASTNAME1 + given-names: FIRSTNAME1 + - family-names: Sovacool + given-names: Kelly + orcid: https://orcid.org/0000-0003-3283-829X + - family-names: Koparde + given-names: Vishal + orcid: https://orcid.org/0000-0001-8978-8495 +title: "TOOL_NAME: insert one-line description here" # TODO: citation title should match pyproject.toml +url: https://ccbr.github.io/TOOL_NAME/ +repository-code: https://github.com/CCBR/TOOL_NAME +license: MIT \ No newline at end of file From c1a89590464c9e2e1135cbd71b8ee29ee279f381 Mon Sep 17 00:00:00 2001 From: dnousome Date: Wed, 13 Dec 2023 12:14:23 -0500 Subject: [PATCH 19/58] fix: missing closing --- subworkflows/local/workflows_tonly.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index 04c0aaa..1d9c2de 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -183,7 +183,7 @@ workflow VC_TONLY { //VarScan_tonly varscan_in_tonly=bambyinterval.combine(contamination_tumoronly.out,by: 0) | varscan_tonly | groupTuple() - | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf/)[0][1].toInteger()},"varscan_tonly") + | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf/)[0][1].toInteger()},"varscan_tonly")} | combineVariants_varscan_tonly | join(sample_sheet) | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"varscan_tonly",normvcf,normindex)} From b694a369b7e696c671890483cfc7f84b619060f5 Mon Sep 17 00:00:00 2001 From: dnousome Date: Wed, 13 Dec 2023 16:06:55 -0500 Subject: [PATCH 20/58] fix: varscan alleles --- modules/local/variant_calling_tonly.nf | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index 1c58bc6..6761229 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -242,7 +242,10 @@ process varscan_tonly { pileup_cmd="samtools mpileup -d 100000 -q 15 -Q 15 -f !{GENOMEREF} -l !{bed} !{tumor}" varscan_cmd="varscan mpileup2cns <($pileup_cmd) $varscan_opts" - eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf" + eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp" + + awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp \ + | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf printf "TUMOR\t!{tumorname}\n" > sampname From 5ebee2c3179bc3e37be9ae85236dd3875ceaa582 Mon Sep 17 00:00:00 2001 From: dnousome Date: Wed, 13 Dec 2023 16:56:57 -0500 Subject: [PATCH 21/58] refactor: citation --- CITATION.cff | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index d1bc133..3562005 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,8 +1,8 @@ cff-version: 1.2.0 message: "Please cite LOGAN as below." # TODO set up Zenodo to archive your tool and assign a DOI. Or if TOOL_NAME gets published in a journal, include the citation here. authors: # TODO: author names should match those in pyproject.toml - - family-names: LASTNAME1 - given-names: FIRSTNAME1 + - family-names: Nousome + given-names: Darryl - family-names: Sovacool given-names: Kelly orcid: https://orcid.org/0000-0003-3283-829X From 96b2415aaeb78d7f04c4d82f70e35b7c9546236f Mon Sep 17 00:00:00 2001 From: dnousome Date: Wed, 13 Dec 2023 17:06:54 -0500 Subject: [PATCH 22/58] docs: start update for MKdocs --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index e6e1e40..91186b8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,4 +1,4 @@ -site_name: CCBR wgs-seek +site_name: CCBR LOGAN site_description: >- CCBR Whole Genome Sequencing Pipeline nav: From 3b9c4e4e698940e716f374af5eb88717c5728192 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Thu, 14 Dec 2023 11:39:52 -0500 Subject: [PATCH 23/58] fix: IUPAC codes --- modules/local/variant_calling.nf | 12 ++++++------ modules/local/variant_calling_tonly.nf | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index 188a9e9..6508d20 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -250,7 +250,7 @@ process mutect2filter { bcftools sort ${tumor}_vs_${normal}.mut2.final.vcf.gz |\ bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ - awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ sed '/^\$/d' | bcftools view - -Oz -o ${tumor}_vs_${normal}.mut2.norm.vcf.gz bcftools index -t ${tumor}_vs_${normal}.mut2.norm.vcf.gz """ @@ -392,9 +392,9 @@ process varscan_tn { varscan_cmd="varscan somatic <($dual_pileup) !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf $varscan_opts --mpileup 1" eval "$varscan_cmd" - awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel \ + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel \ | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp.vcf.gz - awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp \ + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp \ | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz gatk SortVcf -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz \ @@ -569,7 +569,7 @@ process combineVariants { -SD $GENOMEDICT \ -I $vcfin bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ - awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ sed '/^\$/d' > ${sample}.${vc}.temp.vcf bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz @@ -617,7 +617,7 @@ process combineVariants_alternative { bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf bcftools sort ${sample}.${vc}.temp.vcf -Oz -o ${sample}.${vc}.marked.vcf.gz bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ - awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ sed '/^\$/d' > ${sample}.${vc}.temp.vcf bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz @@ -692,7 +692,7 @@ process combineVariants_strelka { """ bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz -a bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ - awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz bcftools sort ${sample}.temp1.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index 6761229..4c4d0a7 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -208,7 +208,7 @@ process mutect2filter_tonly { bcftools sort ${sample}.tonly.mut2.final.vcf.gz |\ bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ - awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\t"; print}}' |\ + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ sed '/^\$/d' |\ bcftools view - -Oz -o ${sample}.tonly.mut2.norm.vcf.gz bcftools index -t ${sample}.tonly.mut2.norm.vcf.gz @@ -244,7 +244,7 @@ process varscan_tonly { eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp" - awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp \ + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp \ | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf printf "TUMOR\t!{tumorname}\n" > sampname From 6a540bf1b220d615e91b6481d0e9e141ab44259e Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Thu, 14 Dec 2023 20:52:54 -0500 Subject: [PATCH 24/58] fix: cli settings --- main.nf | 76 ++++++++++++++------------ modules/local/variant_calling.nf | 2 +- modules/local/variant_calling_tonly.nf | 6 +- src/__main__.py | 6 +- 4 files changed, 50 insertions(+), 40 deletions(-) diff --git a/main.nf b/main.nf index e1e7422..2988cc5 100644 --- a/main.nf +++ b/main.nf @@ -4,53 +4,60 @@ nextflow.enable.dsl=2 date = new Date().format( 'yyyyMMdd' ) -//SUB WORKFLOWS to SPLIT -PIPE_ALIGN=params.PIPE_ALIGN +log.info """\ + L O G A E E K P I P E L I N E + ============================= + genome: ${params.genome} + outdir: ${params.outdir} + Samplesheet: ${params.sample_sheet} + Samples: ${params.fastq_input} ${params.file_input} ${params.bam_input} + """ + .stripIndent() -PIPE_VC=params.PIPE_VC -PIPE_SV=params.PIPE_SV -PIPE_CNV=params.PIPE_CNV -PIPE_QC_GL=params.PIPE_QC_GL -PIPE_QC_NOGL=params.PIPE_QC_NOGL -PIPE_GL=params.PIPE_GL +include {INPUT; ALIGN; GL; + VC; INPUT_BAM; SV; CNVmouse; CNVhuman; + QC_GL; QC_NOGL} from "./subworkflows/local/workflows.nf" -PIPE_TONLY_ALIGN=params.PIPE_TONLY_ALIGN -PIPE_TONLY_VC=params.PIPE_TONLY_VC -PIPE_TONLY_SV=params.PIPE_TONLY_SV -PIPE_TONLY_CNV=params.PIPE_TONLY_CNV -PIPE_TONLY_QC=params.PIPE_TONLY_QC +include {INPUT_TONLY; INPUT_TONLY_BAM; + ALIGN_TONLY; + VC_TONLY; SV_TONLY; CNVhuman_tonly; CNVmouse_tonly; QC_TONLY } from "./subworkflows/local/workflows_tonly.nf" -PIPE_BAMVC=params.PIPE_BAMVC -PIPE_BAMSV=params.PIPE_BAMCNV -PIPE_BAMCNV=params.PIPE_BAMCNV -PIPE_TONLY_BAMVC=params.PIPE_TONLY_BAMVC -PIPE_TONLY_BAMSV=params.PIPE_TONLY_BAMSV -PIPE_TONLY_BAMCNV=params.PIPE_TONLY_BAMCNV +//SUB WORKFLOWS to SPLIT +PIPE_ALIGN=params.align + +PIPE_VC=params.vc +PIPE_SV=params.sv +PIPE_CNV=params.cnv + +PIPE_QC_GL=params.qc_gl +PIPE_QC_NOGL=params.qc_nogl + +PIPE_GL=params.gl + +PIPE_TONLY_ALIGN=params.align_tumoronly +PIPE_TONLY_VC=params.vc_tumoronly +PIPE_TONLY_SV=params.sv_tumoronly +PIPE_TONLY_CNV=params.cnv_tumoronly +PIPE_TONLY_QC=params.qc_tumoronly + + +PIPE_BAMVC=params.vc_bam +PIPE_BAMSV=params.sv_bam +PIPE_BAMCNV=params.cnv_bam + +PIPE_TONLY_BAMVC=params.vc_bam_tumoronly +PIPE_TONLY_BAMSV=params.sv_bam_tumoronly +PIPE_TONLY_BAMCNV=params.cnv_bam_tumoronly -include {INPUT; ALIGN; GL; - VC; INPUT_BAM; SV; CNVmouse; CNVhuman; - QC_GL; QC_NOGL} from "./subworkflows/local/workflows.nf" -include {INPUT_TONLY; INPUT_TONLY_BAM; - ALIGN_TONLY; - VC_TONLY; SV_TONLY; CNVhuman_tonly; CNVmouse_tonly; QC_TONLY } from "./subworkflows/local/workflows_tonly.nf" -log.info """\ - W G S S E E K P I P E L I N E - ============================= - genome: ${params.genome} - outdir: ${params.outdir} - Samplesheet: ${params.sample_sheet} - Samples: ${params.fastq_input} ${params.file_input} ${params.bam_input} - """ - .stripIndent() workflow.onComplete { if (!workflow.stubRun && !workflow.commandLine.contains('-preview')) { @@ -61,7 +68,6 @@ workflow.onComplete { } } -//Final Workflow //Final Workflow workflow { diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index 6508d20..e23cd0e 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -399,7 +399,7 @@ process varscan_tn { gatk SortVcf -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz \ -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp.vcf.gz \ - -R !{GENOMEREF} -SD !{GENOMEDICT} \ + -R !{GENOMEREF} -SD !{GENOMEDICT} \ -O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf printf "NORMAL\t!{normalname}\nTUMOR\t!{tumorname}\n" > sampname diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index 4c4d0a7..9f8bf93 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -1,9 +1,9 @@ GENOMEREF=file(params.genomes[params.genome].genome) GENOMEFAI=file(params.genomes[params.genome].genomefai) GENOMEDICT=file(params.genomes[params.genome].genomedict) -KGPGERMLINE=params.genomes[params.genome].kgp //1000G_phase1.snps.high_confidence.hg38.vcf.gz" -DBSNP=file(params.genomes[params.genome].dbsnp) //dbsnp_138.hg38.vcf.gz" -GNOMADGERMLINE=params.genomes[params.genome].gnomad //somatic-hg38-af-only-gnomad.hg38.vcf.gz +KGPGERMLINE=params.genomes[params.genome].kgp +DBSNP=file(params.genomes[params.genome].dbsnp) +GNOMADGERMLINE=params.genomes[params.genome].gnomad PON=file(params.genomes[params.genome].pon) VEPCACHEDIR=file(params.genomes[params.genome].vepcache) VEPSPECIES=params.genomes[params.genome].vepspecies diff --git a/src/__main__.py b/src/__main__.py index 0a70824..1214479 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -31,7 +31,11 @@ def common_options(func): cls=OrderedCommands, context_settings=dict(help_option_names=["-h", "--help"]) ) @click.version_option(get_version(), "-v", "--version", is_flag=True) -@click.option("--citation", is_flag=True, callback=print_citation, expose_value=False, help="Print the citation in bibtex format and exit.") +#@click.option("--citation", +# is_flag=True, +# callback=print_citation, +# expose_value=False, +# help="Print the citation in bibtex format and exit.") def cli(): """whoLe genOme-sequencinG Analysis pipeliNe From 3301ad0ec837857452a92bd2564d84ecb88ea0e4 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Thu, 14 Dec 2023 22:00:36 -0500 Subject: [PATCH 25/58] fix: update for cli interface --- main.nf | 219 +++++++++++--------------- nextflow.config | 32 ++-- subworkflows/local/workflows_tonly.nf | 4 +- 3 files changed, 106 insertions(+), 149 deletions(-) diff --git a/main.nf b/main.nf index 2988cc5..cb2ca03 100644 --- a/main.nf +++ b/main.nf @@ -5,11 +5,11 @@ date = new Date().format( 'yyyyMMdd' ) log.info """\ - L O G A E E K P I P E L I N E + L O G A N P I P E L I N E ============================= genome: ${params.genome} outdir: ${params.outdir} - Samplesheet: ${params.sample_sheet} + Sample Sheet: ${params.sample_sheet} Samples: ${params.fastq_input} ${params.file_input} ${params.bam_input} """ .stripIndent() @@ -28,37 +28,6 @@ include {INPUT_TONLY; INPUT_TONLY_BAM; //SUB WORKFLOWS to SPLIT -PIPE_ALIGN=params.align - -PIPE_VC=params.vc -PIPE_SV=params.sv -PIPE_CNV=params.cnv - -PIPE_QC_GL=params.qc_gl -PIPE_QC_NOGL=params.qc_nogl - -PIPE_GL=params.gl - -PIPE_TONLY_ALIGN=params.align_tumoronly -PIPE_TONLY_VC=params.vc_tumoronly -PIPE_TONLY_SV=params.sv_tumoronly -PIPE_TONLY_CNV=params.cnv_tumoronly -PIPE_TONLY_QC=params.qc_tumoronly - - -PIPE_BAMVC=params.vc_bam -PIPE_BAMSV=params.sv_bam -PIPE_BAMCNV=params.cnv_bam - -PIPE_TONLY_BAMVC=params.vc_bam_tumoronly -PIPE_TONLY_BAMSV=params.sv_bam_tumoronly -PIPE_TONLY_BAMCNV=params.cnv_bam_tumoronly - - - - - - workflow.onComplete { if (!workflow.stubRun && !workflow.commandLine.contains('-preview')) { def message = Utils.spooker(workflow) @@ -70,123 +39,111 @@ workflow.onComplete { //Final Workflow workflow { - - if (PIPE_ALIGN){ + //Inputs + if (params.fastq_input && params.sample_sheet){ INPUT() ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) - } //Germline - if (PIPE_GL){ - INPUT() - ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) - GL(ALIGN.out.bambyinterval) - } - - //Tumor-Normal Pipelines - if (PIPE_VC){ - INPUT() - ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) - VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet) - } - if (PIPE_QC_GL){ - INPUT() - ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) - GL(ALIGN.out.bambyinterval) - QC_GL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout,GL.out.glnexusout,GL.out.bcfout) - } - if (PIPE_QC_NOGL){ - INPUT() - ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) - QC_NOGL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout) - } - if (PIPE_SV){ - INPUT() - ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) - SV(ALIGN.out.bamwithsample) - } - if (PIPE_CNV){ - INPUT() - ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) - if (params.genome == "mm10"){ - CNVmouse(ALIGN.out.bamwithsample) - } else if (params.genome== "hg38"){ - VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet) - CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input) - + if (params.gl){ + GL(ALIGN.out.bambyinterval) } + //Tumor-Normal VC, SV, CNV + if (params.vc){ + VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet) + } + if (params.sv){ + SV(ALIGN.out.bamwithsample) + } + if (params.cnv){ + if (params.genome == "mm10"){ + CNVmouse(ALIGN.out.bamwithsample) + } else if (params.genome== "hg38"){ + if (!params.vc){ + VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet) + CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input) + } else { + CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input) + } + } } - if (PIPE_BAMVC){ - INPUT_BAM() - VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) - } - if (PIPE_BAMSV){ - INPUT_BAM() - SV(INPUT_BAM.out.bamwithsample) - } - if (PIPE_BAMCNV){ + if (params.qc && params.gl){ + QC_GL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout,GL.out.glnexusout,GL.out.bcfout) + } else if (params.qc){ + QC_NOGL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout) + } + + } + + //TUMOR-NOMRAL BAM INPUT + if (params.bam_input && params.sample_sheet){ INPUT_BAM() - if (params.genome == "mm10"){ - CNVmouse(INPUT_BAM.out.bamwithsample) - } else if (params.genome== "hg38"){ + if (params.vc){ VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) - CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input) - + } + if (params.sv){ + SV(INPUT_BAM.out.bamwithsample) + } + if (params.cnv){ + if (params.genome == "mm10"){ + CNVmouse(INPUT_BAM.out.bamwithsample) + } else if (params.genome== "hg38"){ + if (!params.vc){ + VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) + CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input) + }else { + CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input) + } + } } } - - + ///Tumor Only Pipelines - if (PIPE_TONLY_ALIGN){ + if (params.fastq_input && !params.sample_sheet){ INPUT_TONLY() ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) - } - if (PIPE_TONLY_VC){ - INPUT_TONLY() - ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) - VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet) - } - if (PIPE_TONLY_SV){ - INPUT_TONLY() - ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) - SV_TONLY(ALIGN_TONLY.out.bamwithsample) - } - if (PIPE_TONLY_CNV){ - INPUT_TONLY() - ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) - if (params.genome == "mm10"){ - CNVmouse_tonly(ALIGN_TONLY.out.bamwithsample) - } else if (params.genome== "hg38"){ + if (params.vc){ VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet) - CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input) - } - } - - if (PIPE_TONLY_QC){ - INPUT_TONLY() - ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) - QC_TONLY(ALIGN_TONLY.out.fastqin,ALIGN_TONLY.out.fastpout,ALIGN_TONLY.out.bqsrout) + if (params.sv){ + SV_TONLY(ALIGN_TONLY.out.bamwithsample) + } + if (params.cnv){ + if (params.genome == "mm10"){ + CNVmouse_tonly(ALIGN_TONLY.out.bamwithsample) + } else if (params.genome== "hg38"){ + if (!params.vc){ + VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet) + CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input) + } else{ + CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input) + } + } + } + if (params.qc){ + QC_TONLY(ALIGN_TONLY.out.fastqin,ALIGN_TONLY.out.fastpout,ALIGN_TONLY.out.bqsrout) + } + } - } //Variant Calling from BAM-Tumor Only Mode - if (PIPE_TONLY_BAMVC){ + if (params.bam_input && !params.sample_sheet){ INPUT_TONLY_BAM() - VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) - } - if (PIPE_TONLY_BAMSV){ - INPUT_TONLY_BAM() - SV_TONLY(INPUT_TONLY_BAM.out.bamwithsample) - } - if (PIPE_TONLY_BAMCNV){ - INPUT_TONLY_BAM() - if (params.genome == "mm10"){ - CNVmouse_tonly(INPUT_TONLY_BAM.out.bamwithsample) - }else if (params.genome== "hg38"){ + if (params.vc){ VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) - CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input) - } - } + if (params.sv){ + SV_TONLY(INPUT_TONLY_BAM.out.bamwithsample) + } + if (params.cnv){ + if (params.genome == "mm10"){ + CNVmouse_tonly(INPUT_TONLY_BAM.out.bamwithsample) + } else if (params.genome== "hg38"){ + VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) + CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input) + } + } + + } + } diff --git a/nextflow.config b/nextflow.config index 1718100..3bc3a3f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -33,22 +33,22 @@ params { vep_cache = "/fdb/VEP/102/cache" //SUB WORKFLOWS to SPLIT - PIPE_ALIGN=null - PIPE_GL=null - PIPE_VC=null - PIPE_SV=null - PIPE_CNV=null - PIPE_QC=null - PIPE_QC_NOGL=null - PIPE_QC_GL=null - PIPE_BAMVC=null - PIPE_BAMCNV=null - PIPE_BAMSV=null - - PIPE_TONLY_ALIGN=null - PIPE_TONLY_VC=null - PIPE_TONLY_SV=null - PIPE_TONLY_CNV=null + align=null + gl=null + vc=null + sv=null + cnv=null + qc=null + qc_nogl=null + qc_gl=null + vc_bam=null + cnv_bam=null + sv_bam=null + + //align_=null + //vc=null + sv_tumoronly=null + cnv_tumoronly=null PIPE_BAMVC_TONLY=null PIPE_TONLY_BAMVC=null diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index 1d9c2de..57307c4 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -89,8 +89,8 @@ workflow ALIGN_TONLY { sample_sheet main: - fastp(fastqinput) - splitinterval(intervalbedin) + fastp(fastqinput) + splitinterval(intervalbedin) bwamem2(fastp.out) //indelrealign(bwamem2.out) From 32a23943cbf46e05c9d1a1533421e944e1204131 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Thu, 14 Dec 2023 23:03:01 -0500 Subject: [PATCH 26/58] fix: additional changes for cli --- conf/ci_stub.config | 1 - conf/modules.config | 1 - main.nf | 13 ++++++------- nextflow.config | 20 +------------------- 4 files changed, 7 insertions(+), 28 deletions(-) diff --git a/conf/ci_stub.config b/conf/ci_stub.config index 808f53f..af76ab6 100644 --- a/conf/ci_stub.config +++ b/conf/ci_stub.config @@ -15,7 +15,6 @@ params { process { cpus = 1 memory = '1.GB' - singularity { enabled = false } diff --git a/conf/modules.config b/conf/modules.config index 5ac0c2d..48cf213 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -84,7 +84,6 @@ process { ] } - withName: 'fastqc' { publishDir = [ path: { "${params.outdir}/QC/fastqc" }, diff --git a/main.nf b/main.nf index cb2ca03..01bf4a5 100644 --- a/main.nf +++ b/main.nf @@ -16,8 +16,8 @@ log.info """\ -include {INPUT; ALIGN; GL; - VC; INPUT_BAM; SV; CNVmouse; CNVhuman; +include {INPUT; INPUT_BAM; ALIGN; GL; + VC; SV; CNVmouse; CNVhuman; QC_GL; QC_NOGL} from "./subworkflows/local/workflows.nf" include {INPUT_TONLY; INPUT_TONLY_BAM; @@ -26,7 +26,6 @@ include {INPUT_TONLY; INPUT_TONLY_BAM; - //SUB WORKFLOWS to SPLIT workflow.onComplete { if (!workflow.stubRun && !workflow.commandLine.contains('-preview')) { @@ -40,7 +39,7 @@ workflow.onComplete { //Final Workflow workflow { //Inputs - if (params.fastq_input && params.sample_sheet){ + if ([params.fastq_input,params.file_input].any() && params.sample_sheet){ INPUT() ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) //Germline @@ -75,7 +74,7 @@ workflow { } //TUMOR-NOMRAL BAM INPUT - if (params.bam_input && params.sample_sheet){ + if ([params.bam_input,params.file_input].any() && params.sample_sheet){ INPUT_BAM() if (params.vc){ VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) @@ -98,7 +97,7 @@ workflow { } ///Tumor Only Pipelines - if (params.fastq_input && !params.sample_sheet){ + if ([params.fastq_input,params.file_input].any() && !params.sample_sheet){ INPUT_TONLY() ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) if (params.vc){ @@ -125,7 +124,7 @@ workflow { } //Variant Calling from BAM-Tumor Only Mode - if (params.bam_input && !params.sample_sheet){ + if ([params.bam_input,params.file_input].any() && !params.sample_sheet){ INPUT_TONLY_BAM() if (params.vc){ VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) diff --git a/nextflow.config b/nextflow.config index 3bc3a3f..45ba2c3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -33,30 +33,12 @@ params { vep_cache = "/fdb/VEP/102/cache" //SUB WORKFLOWS to SPLIT - align=null gl=null vc=null sv=null cnv=null qc=null - qc_nogl=null - qc_gl=null - vc_bam=null - cnv_bam=null - sv_bam=null - - //align_=null - //vc=null - sv_tumoronly=null - cnv_tumoronly=null - - PIPE_BAMVC_TONLY=null - PIPE_TONLY_BAMVC=null - PIPE_TONLY_BAMSV=null - PIPE_TONLY_BAMCNV=null - - PIPE_TONLY_QC=null - + //Set all Inputs to null sample_sheet=null fastq_input=null From 724f0edc916ea54d95858922f2fc919bf7b7b46c Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Fri, 15 Dec 2023 09:51:46 -0500 Subject: [PATCH 27/58] fix: rename slurm --- CITATION.cff | 6 +++--- assets/slurm_header_biowulf.sh | 2 +- assets/slurm_header_frce.sh | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 3562005..e52b1c9 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -9,7 +9,7 @@ authors: # TODO: author names should match those in pyproject.toml - family-names: Koparde given-names: Vishal orcid: https://orcid.org/0000-0001-8978-8495 -title: "TOOL_NAME: insert one-line description here" # TODO: citation title should match pyproject.toml -url: https://ccbr.github.io/TOOL_NAME/ -repository-code: https://github.com/CCBR/TOOL_NAME +title: "LOGAN: whoLe genOme-sequencinG Analysis pipeliNe" # TODO: citation title should match pyproject.toml +url: https://ccbr.github.io/LOGAN/ +repository-code: https://github.com/CCBR/LOGAN license: MIT \ No newline at end of file diff --git a/assets/slurm_header_biowulf.sh b/assets/slurm_header_biowulf.sh index 65b61ab..ce79e61 100644 --- a/assets/slurm_header_biowulf.sh +++ b/assets/slurm_header_biowulf.sh @@ -3,7 +3,7 @@ #SBATCH --mem=1g #SBATCH --time=1-00:00:00 #SBATCH --parsable -#SBATCH -J "tool_name" +#SBATCH -J "LOGAN" #SBATCH --mail-type=BEGIN,END,FAIL #SBATCH --output "log/slurm_%j.log" #SBATCH --output "log/slurm_%j.log" diff --git a/assets/slurm_header_frce.sh b/assets/slurm_header_frce.sh index 665274e..957972a 100644 --- a/assets/slurm_header_frce.sh +++ b/assets/slurm_header_frce.sh @@ -3,7 +3,7 @@ #SBATCH --mem=1g #SBATCH --time=1-00:00:00 #SBATCH --parsable -#SBATCH -J "tool_name" +#SBATCH -J "LOGAN" #SBATCH --mail-type=BEGIN,END,FAIL #SBATCH --output "log/slurm_%j.log" #SBATCH --output "log/slurm_%j.log" From 0d9b7c7c4d0e7c0c73ece1fc5fc5e747aab9703c Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Fri, 15 Dec 2023 10:57:58 -0500 Subject: [PATCH 28/58] fix: slurm submission changes --- Docker_hubmodules | 19 ------------------- conf/biowulf.config | 14 ++++++++------ conf/ci_stub.config | 7 ++++--- conf/frce.config | 14 ++++++++------ 4 files changed, 20 insertions(+), 34 deletions(-) delete mode 100644 Docker_hubmodules diff --git a/Docker_hubmodules b/Docker_hubmodules deleted file mode 100644 index 90093ba..0000000 --- a/Docker_hubmodules +++ /dev/null @@ -1,19 +0,0 @@ -## Compile list of Modules to - module=['vcf2maf/1.6.21','VEP/102'] - module=['fastq_screen/0.15.2','bowtie/2-2.5.1'] - module=['kraken/2.1.2', 'kronatools/2.8'] - module=['fastqc/0.11.9'] - module=['qualimap/2.2.1','java/12.0.1'] - module=['samtools/1.16.1'] - module=['vcftools/0.1.16'] - module=['picard/2.20.8'] - module=['bcftools/1.9'] - module=['GATK/4.2.0.0'] - module=["snpEff/4.3t"] - module=['multiqc/1.11'] - module=['GATK/3.8-1'] - module=['bwa-mem2/2.2.1','samblaster/0.1.26','samtools/1.15.1'] - module=['fastp/0.23.2'] - - - diff --git a/conf/biowulf.config b/conf/biowulf.config index 78b3f05..584d846 100644 --- a/conf/biowulf.config +++ b/conf/biowulf.config @@ -26,9 +26,11 @@ singularity { env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS" -process.clusterOptions = ' --gres=lscratch:200 ' -process.scratch = '/lscratch/$SLURM_JOBID' -process.stageInMode = 'symlink' -process.stageOutMode = 'rsync' -// for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps -process.cache = 'lenient' +process { + clusterOptions = ' --gres=lscratch:200 ' + scratch = '/lscratch/$SLURM_JOBID' + stageInMode = 'symlink' + stageOutMode = 'rsync' + // for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps + cache = 'lenient' +} \ No newline at end of file diff --git a/conf/ci_stub.config b/conf/ci_stub.config index af76ab6..f882c66 100644 --- a/conf/ci_stub.config +++ b/conf/ci_stub.config @@ -15,7 +15,8 @@ params { process { cpus = 1 memory = '1.GB' - singularity { - enabled = false - } + scratch = false + singularity { + enabled = false + } } diff --git a/conf/frce.config b/conf/frce.config index 4f132a8..bd0614c 100644 --- a/conf/frce.config +++ b/conf/frce.config @@ -19,11 +19,13 @@ singularity { envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH' } -process.scratch = null // TODO +process { + scratch = null // TODO -process.stageInMode = 'symlink' -process.stageOutMode = 'rsync' + stageInMode = 'symlink' + stageOutMode = 'rsync' -// for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps -process.cache = 'lenient' -} + // for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps + cache = 'lenient' + +} \ No newline at end of file From 699cc09ed04823b3289dae9caffd0ce5db54548e Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Fri, 15 Dec 2023 12:24:58 -0500 Subject: [PATCH 29/58] fix: unpaired mode --- subworkflows/local/workflows.nf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 875c317..6dd79f5 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -374,6 +374,11 @@ workflow CNVmouse { //FREEC Paired Mode bamwithsample | freec_paired + //FREEC Unpaired Mode + bamwithsample + | map{tname,tumor,tbai,nname,norm,nbai->tuple(tname,tumor,tbai)} + | freec + } workflow CNVhuman { From 0467eb455f692ead24be56a2d912b315dc50f756 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Fri, 15 Dec 2023 16:50:12 -0500 Subject: [PATCH 30/58] fix: singularity changes --- conf/biowulf.config | 3 ++- conf/interactive.config | 10 ++++++++++ conf/modules.config | 20 ++++++++++++-------- main.nf | 7 +++++-- nextflow.config | 3 +-- 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/conf/biowulf.config b/conf/biowulf.config index 584d846..a679cf1 100644 --- a/conf/biowulf.config +++ b/conf/biowulf.config @@ -21,7 +21,8 @@ singularity { enabled = true autoMounts = true cacheDir = "/data/CCBR_Pipeliner/SIFS" - envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH' + envWhitelist = 'https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH' + runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' } env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS" diff --git a/conf/interactive.config b/conf/interactive.config index 3f11c04..725d1ae 100644 --- a/conf/interactive.config +++ b/conf/interactive.config @@ -3,5 +3,15 @@ params { max_memory = '220 GB' max_cpus = 56 max_time = '12 h' + + } process.scratch = false + + +singularity { + enabled = true + autoMounts = true + cacheDir = "/data/CCBR_Pipeliner/SIFS" + envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH' +} \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index 48cf213..4c3522c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1,14 +1,15 @@ process { - publishDir = [ - path: { task.label ? "${params.outdir}/${task.label.findAll { !it.startsWith('process_') & !it.startsWith('error_') }.join('/')}/${task.process.tokenize(':')[-1].toLowerCase()}" : "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + //publishDir = [ + // path: { task.label ? "${params.outdir}/${task.label.findAll { !it.startsWith('process_') & !it.startsWith('error_') }.join('/')}/${task.process.tokenize(':')[-1].toLowerCase()}" : "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + // mode: params.publish_dir_mode, + // saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + // ] errorStrategy = 'finish' - withName: sequenza { + withName:'sequenza' { + container = 'dnousome/ccbr_logan_base:v0.3.0' publishDir = [ path: { "${params.outdir}/cnv/sequenza" }, mode: 'copy' @@ -16,14 +17,17 @@ process { } - withName: freec_paired { + withName: 'freec_paired' { publishDir = [ path: { "${params.outdir}/cnv/freec" }, mode: 'copy' ] + container = 'dnousome/ccbr_logan_base:v0.3.0' + } - withName: freec { + withName:'freec' { + container = 'dnousome/ccbr_logan_base:v0.3.0' publishDir = [ path: { "${params.outdir}/cnv/freec" }, mode: 'copy' diff --git a/main.nf b/main.nf index 01bf4a5..0717c5a 100644 --- a/main.nf +++ b/main.nf @@ -39,7 +39,10 @@ workflow.onComplete { //Final Workflow workflow { //Inputs - if ([params.fastq_input,params.file_input].any() && params.sample_sheet){ + //if (params.file_input){ + + //} + if ([params.fastq_input,params.file_input].any() && params.sample_shee && !params.bam){ INPUT() ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) //Germline @@ -74,7 +77,7 @@ workflow { } //TUMOR-NOMRAL BAM INPUT - if ([params.bam_input,params.file_input].any() && params.sample_sheet){ + if ([params.bam_input,params.file_input].any() && params.sample_sheet && params.bam){ INPUT_BAM() if (params.vc){ VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) diff --git a/nextflow.config b/nextflow.config index 45ba2c3..e0f0e16 100644 --- a/nextflow.config +++ b/nextflow.config @@ -7,11 +7,10 @@ manifest { mainScript = "main.nf" } - +includeConfig 'conf/containers.config' includeConfig 'conf/genomes.config' includeConfig 'conf/base.config' includeConfig 'conf/modules.config' -includeConfig 'conf/containers.config' params { From db723ef0e0f7e20fbab732050b1b2a43ed92e428 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Mon, 18 Dec 2023 15:49:21 -0500 Subject: [PATCH 31/58] fix: additional changes to nf template --- conf/ci_stub.config | 2 ++ docker/logan_base/Dockerfile | 7 +++++-- main.nf | 19 ++++++++++--------- nextflow.config | 3 ++- subworkflows/local/workflows.nf | 20 ++++++++++++++++++-- subworkflows/local/workflows_tonly.nf | 8 ++++---- 6 files changed, 41 insertions(+), 18 deletions(-) diff --git a/conf/ci_stub.config b/conf/ci_stub.config index f882c66..aa74e29 100644 --- a/conf/ci_stub.config +++ b/conf/ci_stub.config @@ -20,3 +20,5 @@ process { enabled = false } } + +stubRun = true \ No newline at end of file diff --git a/docker/logan_base/Dockerfile b/docker/logan_base/Dockerfile index 939b44a..844a8b7 100644 --- a/docker/logan_base/Dockerfile +++ b/docker/logan_base/Dockerfile @@ -70,7 +70,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \ apt-transport-https \ software-properties-common -# Install R (4.0) -- and R packages +# Install R (4.2.2) -- and R packages # ggplot2 dplyr plotly htmlwidgets tidyr and a few extras # For more information, check out: https://cran.r-project.org/bin/linux/ubuntu/ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 \ @@ -85,12 +85,15 @@ RUN Rscript -e 'install.packages(c("argparse"), repos="http://cran.r-project.org RUN Rscript -e 'install.packages(c("flexdashboard"), repos="http://cran.r-project.org")' RUN Rscript -e 'BiocManager::install(c("rtracklayer"))' + # Install Sequenza-Utils/3.0.0 and Sequenza # Requires R, Python, SAMtools, tabix (already satisfied) # https://cran.r-project.org/web/packages/sequenza/vignettes/sequenza.html#getting-started +##Install Old version of IOtools for parallel processing RUN pip3 install --upgrade pip \ && pip3 install sequenza-utils \ - && Rscript -e 'remotes::install_github("ShixiangWang/copynumber"); remotes::install_github("cran/sequenza")' + && Rscript -e 'remotes::install_github("ShixiangWang/copynumber"); remotes::install_github("cran/sequenza")' \ + && Rscript -e 'remotes::install_version("iotools",version="0.3-2")' # Install Control-FREEC/v11.6 and additional dependencies # Requires R, samtools, bedtools, sambamba (already satisfied) diff --git a/main.nf b/main.nf index 0717c5a..ae754d1 100644 --- a/main.nf +++ b/main.nf @@ -16,7 +16,7 @@ log.info """\ -include {INPUT; INPUT_BAM; ALIGN; GL; +include {DETERMINEBAM; INPUT; INPUT_BAM; ALIGN; GL; VC; SV; CNVmouse; CNVhuman; QC_GL; QC_NOGL} from "./subworkflows/local/workflows.nf" @@ -38,11 +38,9 @@ workflow.onComplete { //Final Workflow workflow { - //Inputs - //if (params.file_input){ - - //} - if ([params.fastq_input,params.file_input].any() && params.sample_shee && !params.bam){ + DETERMINEBAM() + if ([params.fastq_input,params.file_input].any() && params.sample_sheet && !params.BAMINPUT){ + println "Tumor-Normal FASTQ" INPUT() ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) //Germline @@ -77,7 +75,8 @@ workflow { } //TUMOR-NOMRAL BAM INPUT - if ([params.bam_input,params.file_input].any() && params.sample_sheet && params.bam){ + if ([params.bam_input,params.file_input].any() && params.sample_sheet && BAMINPUT){ + println "Tumor-Normal with BAMs" INPUT_BAM() if (params.vc){ VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) @@ -100,7 +99,8 @@ workflow { } ///Tumor Only Pipelines - if ([params.fastq_input,params.file_input].any() && !params.sample_sheet){ + if ([params.fastq_input,params.file_input].any() && !params.sample_sheet && !params.BAMINPUT){ + println "Tumor-Only FASTQ" INPUT_TONLY() ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) if (params.vc){ @@ -127,7 +127,8 @@ workflow { } //Variant Calling from BAM-Tumor Only Mode - if ([params.bam_input,params.file_input].any() && !params.sample_sheet){ + if ([params.bam_input,params.file_input].any() && !params.sample_sheet && params.BAMINPUT){ + println "Tumor-Only BAM" INPUT_TONLY_BAM() if (params.vc){ VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) diff --git a/nextflow.config b/nextflow.config index e0f0e16..70fd5a7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -37,11 +37,12 @@ params { sv=null cnv=null qc=null - + bam=null //Set all Inputs to null sample_sheet=null fastq_input=null bam_input=null + BAMINPUT=null file_input=null publish_dir_mode = 'symlink' diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 6dd79f5..3e27ff2 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -52,6 +52,21 @@ include {splitinterval} from '../../modules/local/splitbed.nf' +workflow DETERMINEBAM { + if(params.bam_input){ + params.BAMINPUT=true + }else if(params.file_input){ + file(params.file_input).text + //.splitCsv(header: false, sep: "\t", strip:true) + // .map{ sample,bam,bai -> + //if (bam[0] =~ /.bam/){ + // params.BAMINPUT= + //} + //} + } + +} + workflow INPUT { if(params.fastq_input){ @@ -550,12 +565,13 @@ workflow INPUT_BAM { if (bamcheck1.size()>0){ baminputonly=Channel.fromPath(params.bam_input) .map{it-> tuple(it.simpleName,it,file("${it}.bai"))} - } - else if (bamcheck2.size()>0){ + }else if (bamcheck2.size()>0){ bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)} baminputonly=Channel.fromPath(params.bam_input) .map{it-> tuple(it.simpleName,it)} .join(bai) + }else if (bamcheck1.size==0 && bamcheck2.size==0){ + println "Missing BAM Index" } }else if(params.file_input) { diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index 57307c4..c73803a 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -1,6 +1,5 @@ //All Worksflows in One Place // TODO split subworkflows out into one per file - // TODO: this line should be moved to within a subworkflow or the main workflow intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') @@ -93,7 +92,7 @@ workflow ALIGN_TONLY { splitinterval(intervalbedin) bwamem2(fastp.out) - //indelrealign(bwamem2.out) + //indelrealign(bwamem2.out) Consider indelreaglinement using ABRA? bqsrbambyinterval=bwamem2.out.combine(splitinterval.out.flatten()) @@ -335,12 +334,13 @@ workflow INPUT_TONLY_BAM { if (bamcheck1.size()>0){ baminputonly=Channel.fromPath(params.bam_input) | map{it-> tuple(it.simpleName,it,file("${it}.bai"))} - } - else if (bamcheck2.size()>0){ + }else if (bamcheck2.size()>0){ bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)} baminputonly=Channel.fromPath(params.bam_input) | map{it-> tuple(it.simpleName,it)} | join(bai) + }else if (bamcheck1.size==0 && bamcheck2.size==0 ){ + println "Missing BAM Index" } sample_sheet=baminputonly.map{samplename,bam,bai -> tuple ( From 2c2eae94b63d3596a35b97195716fd206c74343b Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 19 Dec 2023 12:55:33 -0500 Subject: [PATCH 32/58] fix: cnv freec changes --- ..._paired.pl => make_freec_genome_paired.pl} | 11 ++-- conf/modules.config | 4 +- modules/local/copynumber.nf | 61 +++++++++++++------ nextflow.config | 2 +- 4 files changed, 53 insertions(+), 25 deletions(-) rename bin/{freec_paired.pl => make_freec_genome_paired.pl} (95%) diff --git a/bin/freec_paired.pl b/bin/make_freec_genome_paired.pl similarity index 95% rename from bin/freec_paired.pl rename to bin/make_freec_genome_paired.pl index 161e24e..474dfaf 100644 --- a/bin/freec_paired.pl +++ b/bin/make_freec_genome_paired.pl @@ -26,18 +26,19 @@ print C "chrFiles = $chrFiles\n"; print C "minimalSubclonePresence = 20\nmaxThreads = 8\n"; print C "outputDir = $ARGV[0]\n\n"; - + print C '[sample]' . "\n\n"; - + print C "mateFile = $tumormateFile\n"; print C "inputFormat = BAM\nmateOrientation = FR\n\n"; -print C '[BAF]' . "\n\n"; +print C '[control]' . "\n\n"; print C "mateFile = $controlmateFile\n"; print C "inputFormat = BAM\nmateOrientation = FR\n\n"; - + +print C '[BAF]' . "\n\n"; print C "makePileup = $makePileup\n"; print C "fastaFile = $fastaFile\n"; print C "minimalCoveragePerPosition = 20\nminimalQualityPerPosition = 20\n"; -print C "SNPfile = $SNPfile"; +print C "SNPfile = $SNPfile"; \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index 4c3522c..185e4ef 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -19,7 +19,7 @@ process { withName: 'freec_paired' { publishDir = [ - path: { "${params.outdir}/cnv/freec" }, + path: { "${params.outdir}/cnv/freec_paired" }, mode: 'copy' ] container = 'dnousome/ccbr_logan_base:v0.3.0' @@ -29,7 +29,7 @@ process { withName:'freec' { container = 'dnousome/ccbr_logan_base:v0.3.0' publishDir = [ - path: { "${params.outdir}/cnv/freec" }, + path: { "${params.outdir}/cnv/freec_unpaired" }, mode: 'copy' ] } diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf index 757465a..fb1c9eb 100644 --- a/modules/local/copynumber.nf +++ b/modules/local/copynumber.nf @@ -123,11 +123,22 @@ process sequenza { process freec_paired { label 'process_highcpu' + publishDir("${outdir}/cnv/freec_paired", mode: 'copy') input: - tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai) + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai) - shell: """ + output: + tuple val(tumorname), val(normalname), + path("${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt"), + path("${tumorname}_vs_${normalname}.bam_ratio.txt"), + path("${tumorname}_vs_${normalname}.bam_BAF.txt"), + path("${tumorname}_vs_${normalname}.bam_ratio.txt.log2.png"), + path("${tumorname}_vs_${normalname}.bam_ratio.txt.png") + + shell: + """ perl $FREECPAIR_SCRIPT \ . \ @@ -150,15 +161,21 @@ process freec_paired { cat $FREECPLOT | \ R --slave \ --args 2 \ - ${tumorname}_vs_${normalname}.bam_ratio.txt \ - ${tumorname}_vs_${normalname}.bam_BAF.txt + ${tumorname}.bam_ratio.txt \ + ${tumorname}.bam_BAF.txt - """ + mv ${tumorname}.bam_CNVs.p.value.txt ${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt + mv ${tumorname}.bam_ratio.txt ${tumorname}_vs_${normalname}.bam_ratio.txt + mv ${tumorname}.bam_BAF.txt ${tumorname}_vs_${normalname}.bam_BAF.txt + mv ${tumorname}.bam_ratio.txt.log2.png ${tumorname}_vs_${normalname}.bam_ratio.txt.log2.png + mv ${tumorname}.bam_ratio.txt.png ${tumorname}_vs_${normalname}.bam_ratio.txt.png + + """ stub: """ - touch ${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt - touch ${tumorname}_vs_${normalname}.bam_ratio.txt + touch ${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt + touch ${tumorname}_vs_${normalname}.bam_ratio.txt touch ${tumorname}_vs_${normalname}.bam_BAF.txt touch ${tumorname}_vs_${normalname}.bam_ratio.txt.log2.png touch ${tumorname}_vs_${normalname}.bam_ratio.txt.png @@ -169,10 +186,20 @@ process freec_paired { process freec { label 'process_mid' + publishDir("${outdir}/cnv/freec_unpaired", mode: 'copy') input: tuple val(tumorname), path(tumor), path(tumorbai) + output: + tuple val(tumorname), + path("${tumorname}.bam_CNVs.p.value.txt"), + path("${tumorname}.bam_ratio.txt"), + path("${tumorname}.bam_BAF.txt"), + path("${tumorname}.bam_ratio.txt.log2.png"), + path("${tumorname}.bam_ratio.txt.png") + + shell: """ perl $FREECSCRIPT \ @@ -189,24 +216,24 @@ process freec { cat $FREECSIGNIFICANCE | \ R --slave \ - --args ${tumor}_CNVs \ - ${tumor}_ratio.txt + --args ${tumorname}_CNVs \ + ${tumorname}_ratio.txt cat $FREECPLOT | \ R --slave \ --args 2 \ - ${tumor}_ratio.txt \ - ${tumor}_BAF.txt + ${tumorname}_ratio.txt \ + ${tumorname}_BAF.txt - """ + """ stub: """ - touch ${tumor}_CNVs.p.value.txt - touch ${tumor}_ratio.txt - touch ${tumor}_BAF.txt - touch ${tumor}_ratio.txt.log2.png - touch ${tumor}_ratio.txt.png + touch ${tumorname}_CNVs.p.value.txt + touch ${tumorname}_ratio.txt + touch ${tumorname}_BAF.txt + touch ${tumorname}_ratio.txt.log2.png + touch ${tumorname}_ratio.txt.png """ } diff --git a/nextflow.config b/nextflow.config index 70fd5a7..9dabc54 100644 --- a/nextflow.config +++ b/nextflow.config @@ -23,7 +23,7 @@ params { script_ancestry = "${projectDir}/bin/sampleCompareAncestoryPlots.R" script_sequenza = "${projectDir}/bin/run_sequenza.R" script_freec = "${projectDir}/bin/make_freec_genome.pl" - script_freecpaired = "${projectDir}/bin/freec_paired.pl" + script_freecpaired = "${projectDir}/bin/make_freec_genome_paired.pl" freec_significance = "${projectDir}/bin/assess_significance.R" freec_plot = "${projectDir}/bin/makeGraph.R" lofreq_convert = "${projectDir}/bin/add_gt_lofreq.sh" From bc6d0362ed2f5a9b5131284baadd5f61c3ba6dcc Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 19 Dec 2023 13:55:06 -0500 Subject: [PATCH 33/58] fix: simplify file inputs --- main.nf | 11 +++++------ subworkflows/local/workflows.nf | 8 ++++---- subworkflows/local/workflows_tonly.nf | 8 ++++---- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/main.nf b/main.nf index ae754d1..2c8367b 100644 --- a/main.nf +++ b/main.nf @@ -10,7 +10,7 @@ log.info """\ genome: ${params.genome} outdir: ${params.outdir} Sample Sheet: ${params.sample_sheet} - Samples: ${params.fastq_input} ${params.file_input} ${params.bam_input} + Samples: ${params.fastq_input} ${params.fastq_file_input} ${params.bam_input} ${params.bam_file_input} """ .stripIndent() @@ -38,8 +38,7 @@ workflow.onComplete { //Final Workflow workflow { - DETERMINEBAM() - if ([params.fastq_input,params.file_input].any() && params.sample_sheet && !params.BAMINPUT){ + if ([params.fastq_input,params.fastq_file_input].any() && params.sample_sheet){ println "Tumor-Normal FASTQ" INPUT() ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) @@ -75,7 +74,7 @@ workflow { } //TUMOR-NOMRAL BAM INPUT - if ([params.bam_input,params.file_input].any() && params.sample_sheet && BAMINPUT){ + if ([params.bam_input,params.bam_file_input].any() && params.sample_sheet){ println "Tumor-Normal with BAMs" INPUT_BAM() if (params.vc){ @@ -99,7 +98,7 @@ workflow { } ///Tumor Only Pipelines - if ([params.fastq_input,params.file_input].any() && !params.sample_sheet && !params.BAMINPUT){ + if ([params.fastq_input,params.file_input].any() && !params.sample_sheet){ println "Tumor-Only FASTQ" INPUT_TONLY() ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) @@ -127,7 +126,7 @@ workflow { } //Variant Calling from BAM-Tumor Only Mode - if ([params.bam_input,params.file_input].any() && !params.sample_sheet && params.BAMINPUT){ + if ([params.bam_input,params.bam_file_input].any() && !params.sample_sheet){ println "Tumor-Only BAM" INPUT_TONLY_BAM() if (params.vc){ diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 3e27ff2..766a41a 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -71,8 +71,8 @@ workflow INPUT { if(params.fastq_input){ fastqinput=Channel.fromFilePairs(params.fastq_input) - }else if(params.file_input) { - fastqinput=Channel.fromPath(params.file_input) + }else if(params.fastq_file_input) { + fastqinput=Channel.fromPath(params.fastq_file_input) .splitCsv(header: false, sep: "\t", strip:true) .map{ sample,fq1,fq2 -> tuple(sample, tuple(file(fq1),file(fq2))) @@ -574,8 +574,8 @@ workflow INPUT_BAM { println "Missing BAM Index" } - }else if(params.file_input) { - baminputonly=Channel.fromPath(params.file_input) + }else if(params.bam_file_input) { + baminputonly=Channel.fromPath(params.bam_file_input) .splitCsv(header: false, sep: "\t", strip:true) .map{ sample,bam,bai -> tuple(sample, file(bam),file(bai)) diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index c73803a..9f19e1a 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -51,8 +51,8 @@ workflow INPUT_TONLY { if(params.fastq_input){ fastqinput=Channel.fromFilePairs(params.fastq_input) - }else if(params.file_input) { - fastqinput=Channel.fromPath(params.file_input) + }else if(params.fastq_file_input) { + fastqinput=Channel.fromPath(params.fastq_file_input) .splitCsv(header: false, sep: "\t", strip:true) .map{ sample,fq1,fq2 -> tuple(sample, tuple(file(fq1),file(fq2))) @@ -346,8 +346,8 @@ workflow INPUT_TONLY_BAM { sample_sheet=baminputonly.map{samplename,bam,bai -> tuple ( samplename)} - }else if(params.file_input) { - baminputonly=Channel.fromPath(params.file_input) + }else if(params.bam_file_input) { + baminputonly=Channel.fromPath(params.bam_file_input) .splitCsv(header: false, sep: "\t", strip:true) .map{ sample,bam,bai -> tuple(sample, file(bam),file(bai)) From 7484751f6504228d09dabe95950a5349d550f4bd Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 19 Dec 2023 13:58:20 -0500 Subject: [PATCH 34/58] feat: add file input lists --- nextflow.config | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 9dabc54..9d4783f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -40,10 +40,15 @@ params { bam=null //Set all Inputs to null sample_sheet=null + + fastq_file_input=null + bam_file_input=null + file_input=null + fastq_input=null bam_input=null + BAMINPUT=null - file_input=null publish_dir_mode = 'symlink' outdir = 'results' From af3f7eab013d4e09cde4dfeb2eaf3c8acb8269f4 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 20 Dec 2023 11:05:59 -0500 Subject: [PATCH 35/58] fix: rename cnv output --- modules/local/copynumber.nf | 63 ++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf index fb1c9eb..be691a9 100644 --- a/modules/local/copynumber.nf +++ b/modules/local/copynumber.nf @@ -121,6 +121,7 @@ process sequenza { } + process freec_paired { label 'process_highcpu' publishDir("${outdir}/cnv/freec_paired", mode: 'copy') @@ -131,11 +132,11 @@ process freec_paired { output: tuple val(tumorname), val(normalname), - path("${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt"), - path("${tumorname}_vs_${normalname}.bam_ratio.txt"), - path("${tumorname}_vs_${normalname}.bam_BAF.txt"), - path("${tumorname}_vs_${normalname}.bam_ratio.txt.log2.png"), - path("${tumorname}_vs_${normalname}.bam_ratio.txt.png") + path("${tumorname}_vs_${normalname}_CNVs.p.value.txt"), + path("${tumorname}_vs_${normalname}_ratio.txt"), + path("${tumorname}_vs_${normalname}_BAF.txt"), + path("${tumorname}_vs_${normalname}_ratio.txt.log2.png"), + path("${tumorname}_vs_${normalname}_ratio.txt.png") shell: """ @@ -161,24 +162,26 @@ process freec_paired { cat $FREECPLOT | \ R --slave \ --args 2 \ - ${tumorname}.bam_ratio.txt \ - ${tumorname}.bam_BAF.txt + ${tumor}_ratio.txt \ + ${tumor}_BAF.txt - mv ${tumorname}.bam_CNVs.p.value.txt ${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt - mv ${tumorname}.bam_ratio.txt ${tumorname}_vs_${normalname}.bam_ratio.txt - mv ${tumorname}.bam_BAF.txt ${tumorname}_vs_${normalname}.bam_BAF.txt - mv ${tumorname}.bam_ratio.txt.log2.png ${tumorname}_vs_${normalname}.bam_ratio.txt.log2.png - mv ${tumorname}.bam_ratio.txt.png ${tumorname}_vs_${normalname}.bam_ratio.txt.png + mv ${tumor}_CNVs.p.value.txt ${tumorname}_vs_${normalname}_CNVs.p.value.txt + mv ${tumor}_ratio.txt ${tumorname}_vs_${normalname}_ratio.txt + mv ${tumor}_BAF.txt ${tumorname}_vs_${normalname}_BAF.txt + mv ${tumor}_BAF.txt.png ${tumorname}_vs_${normalname}_BAF.txt.png + mv ${tumor}_ratio.txt.log2.png ${tumorname}_vs_${normalname}_ratio.txt.log2.png + mv ${tumor}_ratio.txt.png ${tumorname}_vs_${normalname}_ratio.txt.png """ stub: """ - touch ${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt - touch ${tumorname}_vs_${normalname}.bam_ratio.txt - touch ${tumorname}_vs_${normalname}.bam_BAF.txt - touch ${tumorname}_vs_${normalname}.bam_ratio.txt.log2.png - touch ${tumorname}_vs_${normalname}.bam_ratio.txt.png + touch ${tumorname}_vs_${normalname}_CNVs.p.value.txt + touch ${tumorname}_vs_${normalname}_ratio.txt + touch ${tumorname}_vs_${normalname}_BAF.txt + touch ${tumorname}_vs_${normalname}_BAF.txt.png + touch ${tumorname}_vs_${normalname}_ratio.txt.log2.png + touch ${tumorname}_vs_${normalname}_ratio.txt.png """ } @@ -193,11 +196,11 @@ process freec { output: tuple val(tumorname), - path("${tumorname}.bam_CNVs.p.value.txt"), - path("${tumorname}.bam_ratio.txt"), - path("${tumorname}.bam_BAF.txt"), - path("${tumorname}.bam_ratio.txt.log2.png"), - path("${tumorname}.bam_ratio.txt.png") + path("${tumorname}_CNVs.p.value.txt"), + path("${tumorname}_ratio.txt"), + path("${tumorname}_BAF.txt"), + path("${tumorname}_ratio.txt.log2.png"), + path("${tumorname}_ratio.txt.png") shell: """ @@ -216,14 +219,21 @@ process freec { cat $FREECSIGNIFICANCE | \ R --slave \ - --args ${tumorname}_CNVs \ - ${tumorname}_ratio.txt + --args ${tumor}_CNVs \ + ${tumor}_ratio.txt cat $FREECPLOT | \ R --slave \ --args 2 \ - ${tumorname}_ratio.txt \ - ${tumorname}_BAF.txt + ${tumor}_ratio.txt \ + ${tumor}_BAF.txt + + mv ${tumor}_CNVs.p.value.txt ${tumorname}_CNVs.p.value.txt + mv ${tumor}_ratio.txt ${tumorname}_ratio.txt + mv ${tumor}_BAF.txt ${tumorname}_BAF.txt + mv ${tumor}_BAF.txt.png ${tumorname}_BAF.txt.png + mv ${tumor}_ratio.txt.log2.png ${tumorname}_ratio.txt.log2.png + mv ${tumor}_ratio.txt.png ${tumorname}_ratio.txt.png """ @@ -232,6 +242,7 @@ process freec { touch ${tumorname}_CNVs.p.value.txt touch ${tumorname}_ratio.txt touch ${tumorname}_BAF.txt + touch ${tumorname}_BAF.txt.png touch ${tumorname}_ratio.txt.log2.png touch ${tumorname}_ratio.txt.png From 90a05520159a79c54f97e0bdaee31f4d9fdeb87e Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 9 Jan 2024 16:43:07 -0500 Subject: [PATCH 36/58] fix: resources location --- conf/genomes.config | 17 +++++++---------- nextflow.config | 32 ++++++++++++++++---------------- 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/conf/genomes.config b/conf/genomes.config index 2ee6cdc..3d0843a 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -1,11 +1,11 @@ params { genomes { 'hg38' { - genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" - genomefai = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta.fai" + genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" + genomefai = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta.fai" bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta" genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict" - wgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" + wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed" //millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //shapeitindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) // @@ -14,7 +14,7 @@ params { dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz" dbsnp_indel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_indel.vcf" gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz - pon = "/data/nousomedr/wgs/updatedpon.vcf.gz" //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon} + pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/updatedpon.vcf.gz" //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon} kgp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/1000G_phase1.snps.high_confidence.hg38.vcf.gz" KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2" snpeff_genome = "GRCh38.86" @@ -29,11 +29,11 @@ params { octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] - } + } 'mm10' { - genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa" // file(params.genome) - genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa.fai" // file(params.genome) + genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa" // file(params.genome) + genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa.fai" // file(params.genome) bwagenome= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwaindex/genome.fa" genomedict= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.dict" intervals="/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/mm10_wgsregions.bed" @@ -66,6 +66,3 @@ params { } } } - - - diff --git a/nextflow.config b/nextflow.config index 9d4783f..3a3392d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,7 +13,7 @@ includeConfig 'conf/base.config' includeConfig 'conf/modules.config' -params { +params { fastq_screen_conf = "${projectDir}/conf/fastq_screen.conf" get_flowcell_lanes = "${projectDir}/bin/scripts/flowcell_lane.py" @@ -27,7 +27,7 @@ params { freec_significance = "${projectDir}/bin/assess_significance.R" freec_plot = "${projectDir}/bin/makeGraph.R" lofreq_convert = "${projectDir}/bin/add_gt_lofreq.sh" - split_regions = "24" //Number of regions to split by + split_regions = "24" //Number of regions to split by vep_cache = "/fdb/VEP/102/cache" @@ -40,7 +40,7 @@ params { bam=null //Set all Inputs to null sample_sheet=null - + fastq_file_input=null bam_file_input=null file_input=null @@ -72,25 +72,25 @@ profiles { autoMounts = true cacheDir = "$PWD/singularity" envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID' - runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' + runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' } - biowulf { - includeConfig 'conf/biowulf.config' + biowulf { + includeConfig 'conf/biowulf.config' } - frce { - includeConfig 'conf/frce.config' + frce { + includeConfig 'conf/frce.config' } - interactive { - includeConfig 'conf/interactive.config' + interactive { + includeConfig 'conf/interactive.config' } - slurm { - includeConfig 'conf/slurm.config' + slurm { + includeConfig 'conf/slurm.config' } - ci_stub { - includeConfig 'conf/ci_stub.config' + ci_stub { + includeConfig 'conf/ci_stub.config' } } - + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -104,7 +104,7 @@ profiles { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] - + From 4206f0ce1537fed9748a4fb0ff4a056835795465 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 9 Jan 2024 16:46:19 -0500 Subject: [PATCH 37/58] fix: simplify bind paths --- conf/biowulf.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/biowulf.config b/conf/biowulf.config index a679cf1..77a06d5 100644 --- a/conf/biowulf.config +++ b/conf/biowulf.config @@ -22,7 +22,7 @@ singularity { autoMounts = true cacheDir = "/data/CCBR_Pipeliner/SIFS" envWhitelist = 'https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH' - runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' + runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' } env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS" @@ -34,4 +34,4 @@ process { stageOutMode = 'rsync' // for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps cache = 'lenient' -} \ No newline at end of file +} From 9654866777794fcbc1c3afa1032b59a8a2f286af Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 9 Jan 2024 18:00:05 -0500 Subject: [PATCH 38/58] fix: edit all container paths --- conf/containers.config | 5 +- conf/interactive.config | 6 +- conf/modules.config | 10 +- modules/local/copynumber.nf | 50 +++-- modules/local/germline.nf | 53 +++--- modules/local/qc.nf | 190 +++++++++---------- modules/local/trim_align.nf | 62 ++++--- modules/local/variant_calling.nf | 243 +++++++++++++------------ modules/local/variant_calling_tonly.nf | 111 ++++++----- nextflow.config | 3 +- 10 files changed, 396 insertions(+), 337 deletions(-) diff --git a/conf/containers.config b/conf/containers.config index 504d3b8..2ceaf3c 100644 --- a/conf/containers.config +++ b/conf/containers.config @@ -2,6 +2,9 @@ params { containers { base = 'nciccbr/ccbr_ubuntu_base_20.04:v6.1' - logan = 'docker://dnousome/ccbr_logan_base:v0.3.0' + logan = 'docker://dnousome/ccbr_logan_base:v0.3.3' + vcf2maf = 'docker://dnousome/ccbr_vcf2maf:v102.0.0' + octopus = 'docker://dancooke/octopus:latest' + } } diff --git a/conf/interactive.config b/conf/interactive.config index 725d1ae..9808cb5 100644 --- a/conf/interactive.config +++ b/conf/interactive.config @@ -4,7 +4,7 @@ params { max_cpus = 56 max_time = '12 h' - + } process.scratch = false @@ -14,4 +14,6 @@ singularity { autoMounts = true cacheDir = "/data/CCBR_Pipeliner/SIFS" envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH' -} \ No newline at end of file +} + +env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS" diff --git a/conf/modules.config b/conf/modules.config index 185e4ef..a5ff333 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -9,7 +9,7 @@ process { errorStrategy = 'finish' withName:'sequenza' { - container = 'dnousome/ccbr_logan_base:v0.3.0' + container = 'dnousome/ccbr_logan_base:v0.3.3' publishDir = [ path: { "${params.outdir}/cnv/sequenza" }, mode: 'copy' @@ -22,12 +22,12 @@ process { path: { "${params.outdir}/cnv/freec_paired" }, mode: 'copy' ] - container = 'dnousome/ccbr_logan_base:v0.3.0' + container = 'dnousome/ccbr_logan_base:v0.3.3' } withName:'freec' { - container = 'dnousome/ccbr_logan_base:v0.3.0' + container = 'dnousome/ccbr_logan_base:v0.3.3' publishDir = [ path: { "${params.outdir}/cnv/freec_unpaired" }, mode: 'copy' @@ -192,7 +192,7 @@ process { mode: 'copy' ] } - + withName: 'annotsv_tn' { publishDir = [ path: { "${params.outdir}/SV/annotated" }, @@ -291,5 +291,5 @@ process { path: { "${params.outdir}/vcfs/combined" }, mode: 'copy' ] - } + } } diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf index be691a9..e5e0691 100644 --- a/modules/local/copynumber.nf +++ b/modules/local/copynumber.nf @@ -21,12 +21,13 @@ ENSEMBLCACHE='/data/SCLC-BRAINMETS/cn/common/ensembl_data' DRIVERS='/data/SCLC-BRAINMETS/cn/common/DriverGenePanel.38.tsv' HOTSPOTS='/data/SCLC-BRAINMETS/cn/variants/KnownHotspots.somatic.38.vcf.gz' -//DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) +//DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) //ascatR= -//mm10 Paired-Sequenza, FREEC-tumor only +//mm10 Paired-Sequenza, FREEC-tumor only process seqz_sequenza_bychr { + container = "${params.containers.logan}" label 'process_low' input: @@ -57,13 +58,15 @@ process seqz_sequenza_bychr { process sequenza { + container = "${params.containers.logan}" + label 'process_highcpu' input: tuple val(pairid), path(seqz) output: - tuple val(pairid), + tuple val(pairid), path("${pairid}_alternative_solutions.txt"), path("${pairid}_alternative_fit.pdf"), path("${pairid}_model_fit.pdf"), @@ -83,9 +86,9 @@ process sequenza { //samtools mpileup ${normal} -f $GENOMEREF -Q 20 |gzip > ${normalname}.mpileup.gz //sequenza-utils seqz_binning --seqz --window 50 -o ${sample}_bin50.seqz.gz - shell: + shell: ''' - + zcat !{seqz} | awk '{if (NR==1) {print $0} else {if ($1!="chromosome"){print $0}}}' |\ sequenza-utils seqz_binning \ -w 100 \ @@ -99,11 +102,11 @@ process sequenza { ''' - stub: - + stub: + """ - touch "${pairid}_alternative_solutions.txt" - touch "${pairid}_alternative_fit.pdf" + touch "${pairid}_alternative_solutions.txt" + touch "${pairid}_alternative_fit.pdf" touch "${pairid}_model_fit.pdf" touch "${pairid}_confints_CP.txt" touch "${pairid}_CN_bars.pdf" @@ -123,6 +126,8 @@ process sequenza { process freec_paired { + container = "${params.containers.logan}" + label 'process_highcpu' publishDir("${outdir}/cnv/freec_paired", mode: 'copy') @@ -188,6 +193,8 @@ process freec_paired { process freec { + container = "${params.containers.logan}" + label 'process_mid' publishDir("${outdir}/cnv/freec_unpaired", mode: 'copy') @@ -251,11 +258,13 @@ process freec { process amber_tonly { + container = "${params.containers.logan}" + label 'process_mid' input: tuple val(tumorname), path(tumor), path(tumorbai) - + output: tuple val(tumorname), path("${tumorname}_amber") @@ -281,13 +290,15 @@ process amber_tonly { """ mkdir ${tumorname}_amber - touch ${tumorname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_amber/${tumorname}.amber.qc + touch ${tumorname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_amber/${tumorname}.amber.qc """ } process amber_tn { + container = "${params.containers.logan}" + label 'process_mid' - + input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai) @@ -317,11 +328,13 @@ process amber_tn { """ mkdir ${tumorname}_vs_${normalname}_amber - touch ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.qc + touch ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.qc """ } process cobalt_tonly { + container = "${params.containers.logan}" + label "process_mid" input: @@ -329,7 +342,7 @@ process cobalt_tonly { output: tuple val(tumorname), path("${tumorname}_cobalt") - //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), + //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), //path("${samplename}/${samplename}.cobalt.ratio.pcf"), //path("${samplename}/${samplename}.cobalt.gc.median.tsv") @@ -355,6 +368,8 @@ process cobalt_tonly { } process cobalt_tn { + container = "${params.containers.logan}" + label "process_mid" input: @@ -363,7 +378,7 @@ process cobalt_tn { output: tuple val(tumorname), path("${tumorname}_vs_${normalname}_cobalt") - //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), + //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), //path("${samplename}/${samplename}.cobalt.ratio.pcf"), //path("${samplename}/${samplename}.cobalt.gc.median.tsv") @@ -391,12 +406,14 @@ process cobalt_tn { process purple { + container = "${params.containers.logan}" + label 'process_mid' publishDir("${outdir}/cnv/purple", mode: 'copy') input: tuple val(tumorname), - path(cobaltin), + path(cobaltin), path(amberin), path(somaticvcf), path(somaticvcfindex) @@ -474,4 +491,3 @@ process ascat_tn { } */ - diff --git a/modules/local/germline.nf b/modules/local/germline.nf index c106683..285a0f3 100644 --- a/modules/local/germline.nf +++ b/modules/local/germline.nf @@ -5,15 +5,15 @@ MODEL="/opt/models/wgs/model.ckpt" //Processes //Deep Variant process deepvariant_step1 { - + input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path(bed) - + output: - tuple val(samplename), path("outputshard/${samplename}.tfrecord_${bed}.gz"), + tuple val(samplename), path("outputshard/${samplename}.tfrecord_${bed}.gz"), path("gvcf/${samplename}.gvcf.tfrecord_${bed}.gz") - script: + script: """ mkdir -p outputshard mkdir -p gvcf @@ -24,7 +24,7 @@ process deepvariant_step1 { --reads ${samplename}.bam \ --channels insert_size \ --examples outputshard/${samplename}.tfrecord_${bed}.gz \ - --gvcf gvcf/${samplename}.gvcf.tfrecord_${bed}.gz + --gvcf gvcf/${samplename}.gvcf.tfrecord_${bed}.gz """ stub: @@ -32,23 +32,22 @@ process deepvariant_step1 { mkdir -p outputshard mkdir -p gvcf touch outputshard/${samplename}.tfrecord_${bed}.gz - touch gvcf/${samplename}.gvcf.tfrecord_${bed}.gz + touch gvcf/${samplename}.gvcf.tfrecord_${bed}.gz """ } //Step 2 requires GPU process deepvariant_step2 { - - + input: tuple val(samplename), path(tfrecords), path(tfgvcf) - + output: - tuple val(samplename), path(tfrecords), + tuple val(samplename), path(tfrecords), path("${samplename}_call_variants_output.tfrecord.gz"), path(tfgvcf) - script: + script: """ call_variants \ @@ -69,17 +68,16 @@ process deepvariant_step2 { //Step 3 DV process deepvariant_step3 { - input: tuple val(samplename), path(tfrecords), path("${samplename}_call_variants_output.tfrecord.gz"), path(tfgvcf) - + output: tuple val(samplename), path("${samplename}.vcf.gz"), path("${samplename}.vcf.gz.tbi"), path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi") - script: + script: """ postprocess_variants \ --ref $GENOMEREF \ @@ -101,16 +99,15 @@ process deepvariant_step3 { //Combined DeepVariant process deepvariant_combined { - input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") - + output: tuple val(samplename), path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi"), path("${samplename}.vcf.gz"), path("${samplename}.vcf.gz.tbi") - script: + script: """ run_deepvariant \ --model_type=WGS \ @@ -118,7 +115,7 @@ process deepvariant_combined { --reads=${samplename}.bam \ --output_gvcf= ${samplename}.gvcf.gz \ --output_vcf=${samplename}.vcf.gz \ - --num_shards=16 + --num_shards=16 """ @@ -126,7 +123,7 @@ process deepvariant_combined { """ touch ${samplename}.vcf.gz ${samplename}.vcf.gz.tbi touch ${samplename}.gvcf.gz ${samplename}.gvcf.gz.tbi - + """ @@ -134,20 +131,19 @@ process deepvariant_combined { process glnexus { - input: path(gvcfs) - + output: - tuple path("germline.v.bcf"), + tuple path("germline.v.bcf"), path("germline.norm.vcf.gz"),path("germline.norm.vcf.gz.tbi") - script: + script: """ glnexus_cli --config DeepVariant_unfiltered \ *.gvcf.gz --threads 8 > germline.v.bcf - + bcftools norm \ -m - \ -Oz \ @@ -160,18 +156,13 @@ process glnexus { -f -t \ --threads 8 \ germline.norm.vcf.gz - + """ stub: """ touch germline.v.bcf - touch germline.norm.vcf.gz + touch germline.norm.vcf.gz touch germline.norm.vcf.gz.tbi """ } - - - - - diff --git a/modules/local/qc.nf b/modules/local/qc.nf index 82bcc1a..04b8022 100644 --- a/modules/local/qc.nf +++ b/modules/local/qc.nf @@ -13,19 +13,21 @@ ANCESTRY_DB=file(params.genomes[params.genome].somalier_ancestrydb) SCRIPT_PATH_GENDER = file(params.script_genderPrediction) SCRIPT_PATH_SAMPLES = file(params.script_combineSamples) SCRIPT_PATH_PCA = file(params.script_ancestry) - -//OUTPUT DIRECTORY + +//OUTPUT DIRECTORY process fc_lane { + container = "${params.containers.logan}" + label 'process_low' input: tuple val(samplename), path(fqs) - output: + output: tuple val(samplename), - path("${samplename}.fastq.info.txt") - + path("${samplename}.fastq.info.txt") + script: GET_FLOWCELL_LANES=file(params.get_flowcell_lanes) @@ -35,7 +37,7 @@ process fc_lane { ${samplename} > ${samplename}.fastq.info.txt """ - stub: + stub: """ touch ${samplename}.fastq.info.txt """ @@ -61,7 +63,7 @@ process fastq_screen { path("${samplename}.R2.trimmed_screen.png"), path("${samplename}.R2.trimmed_screen.txt") - script: + script: FASTQ_SCREEN_CONF=file(params.fastq_screen_conf) """ @@ -75,7 +77,7 @@ process fastq_screen { """ - stub: + stub: """ touch ${samplename}.R1.trimmed_screen.html ${samplename}.R1.trimmed_screen.png touch ${samplename}.R1.trimmed_screen.txt ${samplename}.R2.trimmed_screen.html @@ -92,11 +94,11 @@ process kraken { @Input: Trimmed FastQ files (scatter) @Output: - Kraken logfile and interative krona report + Kraken logfile and interactive krona report */ - + input: - tuple val(samplename), + tuple val(samplename), path(fqs) output: @@ -104,19 +106,19 @@ process kraken { //path("${samplename}.trimmed.kraken_bacteria.out.txt"), path("${samplename}.trimmed.kraken_bacteria.taxa.txt"), path("${samplename}.trimmed.kraken_bacteria.krona.html") - - script: + + script: """ #Setups temporary directory for - #intermediate files with built-in + #intermediate files with built-in #mechanism for deletion on exit - - + + # Copy kraken2 db to local node storage to reduce filesystem strain cp -rv $BACDB . kdb_base=\$(basename $BACDB) - + kraken2 --db $BACDB \ --threads 16 --report ${samplename}.trimmed.kraken_bacteria.taxa.txt \ --output - \ @@ -127,7 +129,7 @@ process kraken { ktImportTaxonomy - -o ${samplename}.trimmed.kraken_bacteria.krona.html """ - stub: + stub: """ touch ${samplename}.trimmed.kraken_bacteria.taxa.txt ${samplename}.trimmed.kraken_bacteria.krona.html """ @@ -155,18 +157,18 @@ process fastqc { //threads: 8 //module=['fastqc/0.11.9'] - script: + script: """ mkdir -p fastqc fastqc -t 8 \ -f bam \ -o fastqc \ - ${samplename}.bqsr.bam + ${samplename}.bqsr.bam mv fastqc/${samplename}.bqsr_fastqc.html ${samplename}_fastqc.html mv fastqc/${samplename}.bqsr_fastqc.zip ${samplename}_fastqc.zip """ - stub: + stub: """ touch ${samplename}_fastqc.html ${samplename}_fastqc.zip """ @@ -174,7 +176,7 @@ process fastqc { process qualimap_bamqc { /* - Quality-control step to assess various post-alignment metrics + Quality-control step to assess various post-alignment metrics and a secondary method to calculate insert size. Please see QualiMap's website for more information about BAM QC: http://qualimap.conesalab.org/ @@ -182,15 +184,15 @@ process qualimap_bamqc { Recalibrated BAM file (scatter) @Output: Report containing post-aligment quality-control metrics - */ + */ input: tuple val(samplename), path(bam), path(bai) - output: + output: tuple path("${samplename}_genome_results.txt"), path("${samplename}_qualimapReport.html") - script: + script: """ unset DISPLAY qualimap bamqc -bam ${bam} \ @@ -215,9 +217,9 @@ process qualimap_bamqc { process samtools_flagstats { /* - Quality-control step to assess alignment quality. Flagstat provides - counts for each of 13 categories based primarily on bit flags in the - FLAG field. Information on the meaning of the flags is given in the + Quality-control step to assess alignment quality. Flagstat provides + counts for each of 13 categories based primarily on bit flags in the + FLAG field. Information on the meaning of the flags is given in the SAM specification: https://samtools.github.io/hts-specs/SAMv1.pdf @Input: Recalibrated BAM file (scatter) @@ -225,21 +227,21 @@ process samtools_flagstats { Text file containing alignment statistics */ label 'process_mid' - + input: tuple val(samplename), path(bam), path(bai) - + output: path("${samplename}.samtools_flagstat.txt") - script: + script: """ samtools flagstat ${bam} > ${samplename}.samtools_flagstat.txt """ stub: """ - touch ${samplename}.samtools_flagstat.txt + touch ${samplename}.samtools_flagstat.txt """ } @@ -260,7 +262,7 @@ process mosdepth { */ input: tuple val(samplename), path(bam), path(bai) - + output: path("${samplename}.mosdepth.region.dist.txt"), path("${samplename}.mosdepth.summary.txt"), @@ -268,7 +270,7 @@ process mosdepth { path("${samplename}.regions.bed.gz.csi") - script: + script: """ mosdepth -n --fast-mode --by 500 ${samplename} ${bam} -t $task.cpus """ @@ -282,12 +284,12 @@ process mosdepth { """ } -process vcftools { +process vcftools { /* - Quality-control step to calculates a measure of heterozygosity on + Quality-control step to calculates a measure of heterozygosity on a per-individual basis. The inbreeding coefficient, F, is estimated for each individual using a method of moments. Please see VCFtools - documentation for more information: + documentation for more information: https://vcftools.github.io/man_latest.html @Input: Multi-sample gVCF file (indirect-gather-due-to-aggregation) @@ -296,14 +298,14 @@ process vcftools { */ label 'process_mid' - - input: + + input: tuple path(germlinevcf),path(germlinetbi) - output: + output: path("variants_raw_variants.het") - - - script: + + + script: """ vcftools --gzvcf ${germlinevcf} --het --out variants_raw_variants """ @@ -323,16 +325,16 @@ process collectvariantcallmetrics { @Input: Multi-sample gVCF file (indirect-gather-due-to-aggregation) @Output: - Text file containing a collection of metrics relating to snps and indels - */ - input: + Text file containing a collection of metrics relating to snps and indels + */ + input: tuple path(germlinevcf),path(germlinetbi) - - output: + + output: tuple path("raw_variants.variant_calling_detail_metrics"), path("raw_variants.variant_calling_summary_metrics") - + script: """ java -Xmx24g -jar \${PICARDJARPATH}/picard.jar \ @@ -341,7 +343,7 @@ process collectvariantcallmetrics { OUTPUT= "raw_variants" \ DBSNP=$DBSNP Validation_Stringency=SILENT """ - + stub: """ touch raw_variants.variant_calling_detail_metrics raw_variants.variant_calling_summary_metrics @@ -354,9 +356,9 @@ process bcftools_stats { /* Quality-control step to collect summary statistics from bcftools stats. When bcftools stats is run with one VCF file then stats by non-reference - allele frequency, depth distribution, stats by quality and per-sample - counts, singleton statsistics are calculated. Please see bcftools' - documentation for more information: + allele frequency, depth distribution, stats by quality and per-sample + counts, singleton statsistics are calculated. Please see bcftools' + documentation for more information: http://samtools.github.io/bcftools/bcftools.html#stats @Input: Per sample gVCF file (scatter) @@ -370,8 +372,8 @@ process bcftools_stats { tuple val(samplename), path("${samplename}.gvcf.gz"),path("${samplename}.gvcf.gz.tbi") output: path("${samplename}.germline.bcftools_stats.txt") - - script: + + script: """ bcftools stats ${samplename}.gvcf.gz > ${samplename}.germline.bcftools_stats.txt """ @@ -385,11 +387,11 @@ process bcftools_stats { process gatk_varianteval { /* - Quality-control step to calculate various quality control metrics from a - variant callset. These metrics include the number of raw or filtered SNP + Quality-control step to calculate various quality control metrics from a + variant callset. These metrics include the number of raw or filtered SNP counts; ratio of transition mutations to transversions; concordance of a particular sample's calls to a genotyping chip; number of s per sample. - Please see GATK's documentation for more information: + Please see GATK's documentation for more information: https://gatk.broadinstitute.org/hc/en-us/articles/360040507171-VariantEval @Input: Per sample gVCF file (scatter) @@ -398,9 +400,9 @@ process gatk_varianteval { */ label 'process_mid' - input: + input: tuple val(samplename), path("${samplename}.gvcf.gz") ,path("${samplename}.gvcf.gz.tbi") - output: + output: path("${samplename}.germline.eval.grp") //params: // rname = "vareval", @@ -410,7 +412,7 @@ process gatk_varianteval { //message: "Running GATK4 VariantEval on '{input.vcf}' input file" //container: config['images']['wes_base'] //threads: 16 - script: + script: """ gatk --java-options '-Xmx12g -XX:ParallelGCThreads=16' VariantEval \ -R $GENOMEREF \ @@ -431,7 +433,7 @@ process snpeff { /* Data processing and quality-control step to annotate variants, predict its functional effects, and collect various summary statistics about variants and - their annotations. Please see SnpEff's documentation for more information: + their annotations. Please see SnpEff's documentation for more information: https://pcingola.github.io/SnpEff/ @Input: Per sample gVCF file (scatter) @@ -440,14 +442,14 @@ process snpeff { */ label 'process_mid' - input: + input: tuple val(samplename), path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi") - output: + output: tuple path("${samplename}.germline.snpeff.ann.vcf"), path("${samplename}.germline.snpeff.ann.csv"), path("${samplename}.germline.snpeff.ann.html") - script: + script: """ java -Xmx12g -jar \$SNPEFF_JAR \ -v -canon -c $SNPEFF_CONFIG \ @@ -478,15 +480,15 @@ process somalier_extract { input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") - output: + output: path("output/${samplename}.somalier") //params: // sites_vcf = config['references']['SOMALIER']['SITES_VCF'], // genomeFasta = config['references']['GENOME'], // rname = 'somalier_extract' //container: config['images']['wes_base'] - script: - """ + script: + """ mkdir -p output somalier extract \ -d output \ @@ -498,7 +500,7 @@ process somalier_extract { stub: """ mkdir -p output - touch output/${samplename}.somalier + touch output/${samplename}.somalier """ } @@ -518,21 +520,21 @@ process somalier_analysis_human { input: path(somalierin) - + output: tuple path("relatedness.pairs.tsv"), path("relatedness.samples.tsv"), path("ancestry.somalier-ancestry.tsv"), path("predicted.genders.tsv"), path("predicted.pairs.tsv"), path("sampleAncestryPCAPlot.html"), path("predictedPairsAncestry.pdf") - + script: - """ + """ echo "Estimating relatedness" somalier relate \ -o "relatedness" \ $somalierin - + echo "Estimating ancestry" somalier ancestry \ -o "ancestry" \ @@ -542,19 +544,19 @@ process somalier_analysis_human { Rscript $SCRIPT_PATH_GENDER \ relatedness.samples.tsv \ - predicted.genders.tsv - + predicted.genders.tsv + Rscript $SCRIPT_PATH_SAMPLES \ relatedness.pairs.tsv \ predicted.pairs.tsv - + Rscript $SCRIPT_PATH_PCA \ ancestry.somalier-ancestry.tsv \ predicted.pairs.tsv \ sampleAncestryPCAPlot.html \ predictedPairsAncestry.pdf """ - + stub: """ @@ -581,30 +583,30 @@ process somalier_analysis_mouse { input: path(somalierin) - + output: - tuple path("relatedness.pairs.tsv"), + tuple path("relatedness.pairs.tsv"), path("relatedness.samples.tsv"), path("predicted.genders.tsv"), path("predicted.pairs.tsv") - + script: - """ + """ echo "Estimating relatedness" somalier relate \ -o "relatedness" \ $somalierin - + Rscript $SCRIPT_PATH_GENDER \ relatedness.samples.tsv \ - predicted.genders.tsv - + predicted.genders.tsv + Rscript $SCRIPT_PATH_SAMPLES \ relatedness.pairs.tsv \ predicted.pairs.tsv - + """ - + stub: """ @@ -612,7 +614,7 @@ process somalier_analysis_mouse { touch relatedness.samples.tsv touch predicted.genders.tsv touch predicted.pairs.tsv - + """ } @@ -620,23 +622,23 @@ process multiqc { """ Reporting step to aggregate sample summary statistics and quality-control - information across all samples. This will be one of the last steps of the - pipeline. The inputs listed here are to ensure that this step runs last. - During runtime, MultiQC will recurively crawl through the working directory + information across all samples. This will be one of the last steps of the + pipeline. The inputs listed here are to ensure that this step runs last. + During runtime, MultiQC will recursively crawl through the working directory and parse files that it supports. @Input: List of files to ensure this step runs last (gather) @Output: Interactive MulitQC report and a QC metadata table """ - - input: + + input: path(allqcin) - output: + output: path("MultiQC_Report.html") - script: + script: """ multiqc . \ diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf index 4fa34db..8a69287 100644 --- a/modules/local/trim_align.nf +++ b/modules/local/trim_align.nf @@ -3,12 +3,13 @@ KNOWNRECAL = params.genomes[params.genome].KNOWNRECAL process fastp { + container = "${params.containers.logan}" label 'process_mid' tag { name } input: tuple val(samplename), path(fqs) - + output: tuple val(samplename), path("${samplename}.R1.trimmed.fastq.gz"), @@ -40,15 +41,16 @@ process fastp { process bwamem2 { + container = "${params.containers.logan}" tag { name } - + input: - tuple val(samplename), + tuple val(samplename), path("${samplename}.R1.trimmed.fastq.gz"), path("${samplename}.R2.trimmed.fastq.gz"), path("${samplename}.fastp.json"), path("${samplename}.fastp.html") - + output: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") @@ -77,9 +79,9 @@ process bwamem2 { process bqsr { /* - Base quality recalibration for all samples - */ - + Base quality recalibration for all samples + */ + container = "${params.containers.logan}" label 'process_low' input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path(bed) @@ -99,19 +101,20 @@ process bqsr { stub: """ - touch ${samplename}_${bed.simpleName}.recal_data.grp + touch ${samplename}_${bed.simpleName}.recal_data.grp """ } process gatherbqsr { + container = "${params.containers.logan}" label 'process_low' - input: + input: tuple val(samplename), path(recalgroups) output: tuple val(samplename), path("${samplename}.recal_data.grp") script: - + strin = recalgroups.join(" --input ") """ @@ -131,10 +134,11 @@ process gatherbqsr { process applybqsr { /* - Base quality recalibration for all samples to - */ + Base quality recalibration for all samples to + */ + container = "${params.containers.logan}" label 'process_low' - + input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path("${samplename}.recal_data.grp") @@ -154,7 +158,7 @@ process applybqsr { """ stub: - + """ touch ${samplename}.bqsr.bam ${samplename}.bqsr.bai """ @@ -164,12 +168,13 @@ process applybqsr { process samtoolsindex { + container = "${params.containers.logan}" label 'process_mid' - publishDir(path: "${outdir}/bams/BQSR", mode: 'copy') - + publishDir(path: "${outdir}/bams/BQSR", mode: 'copy') + input: tuple val(bamname), path(bam) - + output: tuple val(bamname), path(bam), path("${bam}.bai") @@ -187,9 +192,10 @@ process samtoolsindex { //Save to CRAM for output process bamtocram_tonly { + container = "${params.containers.logan}" label 'process_mid' - - input: + + input: tuple val(tumorname), path(tumor), path(tumorbai) output: @@ -204,27 +210,27 @@ process bamtocram_tonly { /* process indelrealign { - //Briefly, RealignerTargetCreator runs faster with increasing -nt threads, + //Briefly, RealignerTargetCreator runs faster with increasing -nt threads, //while IndelRealigner shows diminishing returns for increasing scatter - + tag { name } - + input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") output: tuple val(samplename), path("${samplename}.ir.bam") - script: - + script: + """ /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T RealignerTargetCreator \ -I ${samplename}.bam \ -R ${GENOMEREF} \ -o ${samplename}.intervals \ -nt 16 \ - -known ${MILLSINDEL} -known ${SHAPEITINDEL} - + -known ${MILLSINDEL} -known ${SHAPEITINDEL} + /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T IndelRealigner \ -R ${GENOMEREF} \ -I ${samplename}.bam \ @@ -234,11 +240,11 @@ process indelrealign { -targetIntervals ${samplename}.intervals \ -o ${samplename}.ir.bam """ - + stub: """ - touch ${samplename}.ir.bam + touch ${samplename}.ir.bam """ } diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index e23cd0e..c580c2b 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -1,10 +1,10 @@ GENOMEREF=file(params.genomes[params.genome].genome) GENOMEFAI=file(params.genomes[params.genome].genomefai) GENOMEDICT=file(params.genomes[params.genome].genomedict) -KGPGERMLINE=params.genomes[params.genome].kgp -DBSNP=file(params.genomes[params.genome].dbsnp) -GNOMADGERMLINE=params.genomes[params.genome].gnomad -PON=file(params.genomes[params.genome].pon) +KGPGERMLINE=params.genomes[params.genome].kgp +DBSNP=file(params.genomes[params.genome].dbsnp) +GNOMADGERMLINE=params.genomes[params.genome].gnomad +PON=file(params.genomes[params.genome].pon) VEPCACHEDIR=file(params.genomes[params.genome].vepcache) VEPSPECIES=params.genomes[params.genome].vepspecies VEPBUILD=params.genomes[params.genome].vepbuild @@ -15,20 +15,21 @@ LOFREQ_CONVERT=params.lofreq_convert process mutect2 { + container = "${params.containers.logan}" label 'process_somaticcaller' input: tuple val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai), + val(normalname), path(normal), path(normalbai), path(bed) - + output: tuple val(tumorname), val(normalname), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz"), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.f1r2.tar.gz"), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats") - + script: """ gatk Mutect2 \ @@ -55,12 +56,13 @@ process mutect2 { process pileup_paired_t { + container = "${params.containers.logan}" label 'process_highmem' input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) - + output: tuple val(tumorname), path("${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table") @@ -71,7 +73,7 @@ process pileup_paired_t { -I ${tumor} \ -V $KGPGERMLINE \ -L ${bed} \ - -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table + -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table """ @@ -84,12 +86,13 @@ process pileup_paired_t { process pileup_paired_n { + container = "${params.containers.logan}" label 'process_highmem' input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) - + output: tuple val(tumorname), path("${tumor.simpleName}_${bed.simpleName}.normal.pileup.table") @@ -100,10 +103,10 @@ process pileup_paired_n { -I ${normal} \ -V $KGPGERMLINE \ -L ${bed} \ - -O ${tumor.simpleName}_${bed.simpleName}.normal.pileup.table + -O ${tumor.simpleName}_${bed.simpleName}.normal.pileup.table """ - + stub: """ touch ${tumor.simpleName}_${bed.simpleName}.normal.pileup.table @@ -113,13 +116,14 @@ process pileup_paired_n { process contamination_paired { + container = "${params.containers.logan}" label 'process_highmem' input: tuple val(tumorname), path(tumor_pileups), path(normal_pileups) - + output: tuple val(tumorname), path("${tumorname}_allpileups.table"), @@ -137,7 +141,7 @@ process contamination_paired { gatk GatherPileupSummaries \ --sequence-dictionary $GENOMEDICT \ -I ${alltumor} -O ${tumorname}_allpileups.table - + gatk GatherPileupSummaries \ --sequence-dictionary $GENOMEDICT \ -I ${allnormal} -O ${tumorname}_normal.allpileups.table @@ -161,20 +165,21 @@ process contamination_paired { touch ${tumorname}_normal.contamination.table """ - + } process learnreadorientationmodel { + container = "${params.containers.logan}" label 'process_highmem' input: tuple val(sample), path(f1r2) - + output: tuple val(sample), path("${sample}.read-orientation-model.tar.gz") - script: + script: f1r2in = f1r2.join(" --input ") """ @@ -191,15 +196,16 @@ process learnreadorientationmodel { process mergemut2stats { + container = "${params.containers.logan}" label 'process_low' input: tuple val(sample), path(stats) - + output: tuple val(sample), path("${sample}.final.stats") - script: + script: statsin = stats.join(" --stats ") """ @@ -217,17 +223,18 @@ process mergemut2stats { process mutect2filter { + container = "${params.containers.logan}" label 'process_mid' - + input: - tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs), + tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs), path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination) - + output: - tuple val("${tumor}_vs_${normal}"), - path("${tumor}_vs_${normal}.mut2.marked.vcf.gz"), + tuple val("${tumor}_vs_${normal}"), + path("${tumor}_vs_${normal}.mut2.marked.vcf.gz"), path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi"), - path("${tumor}_vs_${normal}.mut2.norm.vcf.gz"), path("${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi"), + path("${tumor}_vs_${normal}.mut2.norm.vcf.gz"), path("${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi"), path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.filteringStats.tsv") script: @@ -247,7 +254,7 @@ process mutect2filter { --variant ${tumor}_vs_${normal}.mut2.marked.vcf.gz \ --exclude-filtered \ --output ${tumor}_vs_${normal}.mut2.final.vcf.gz - + bcftools sort ${tumor}_vs_${normal}.mut2.final.vcf.gz |\ bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ @@ -267,18 +274,19 @@ process mutect2filter { process strelka_tn { + container = "${params.containers.logan}" label 'process_highcpu' input: - tuple val(tumorname), path(tumor), path(tumorbai), + tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) - + output: tuple val(tumorname), val(normalname), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz"), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi"), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz"), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi") - + script: """ @@ -296,21 +304,21 @@ process strelka_tn { ./wd/runWorkflow.py -m local -j $task.cpus mv wd/results/variants/somatic.snvs.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz mv wd/results/variants/somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz - - printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" >sampname - + + printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" >sampname + bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz \ - | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz + | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz \ - | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz + | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz - bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz - + bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz + """ stub: - + """ touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi @@ -321,16 +329,17 @@ process strelka_tn { process vardict_tn { + container = "${params.containers.logan}" label 'process_highcpu' input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) - + output: tuple val(tumorname), val(normalname), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz") - //bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and - //filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))” + //bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and + //filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))” script: """ @@ -350,8 +359,8 @@ process vardict_tn { -S \ -f 0.05 > ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf - printf "${normal.Name}\t${normalname}\n${tumor.Name}\t${tumorname}\n" > sampname - + printf "${normal.Name}\t${normalname}\n${tumor.Name}\t${tumorname}\n" > sampname + bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf \ | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz @@ -359,7 +368,7 @@ process vardict_tn { """ stub: - + """ touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz @@ -370,19 +379,20 @@ process vardict_tn { process varscan_tn { + container = "${params.containers.logan}" label 'process_somaticcaller' input: - tuple val(tumorname), path(tumor), path(tumorbai), + tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed), val(tumor1), - path(tumorpileup), path(normalpileup), + path(tumorpileup), path(normalpileup), path(tumor_con_table), path(normal_con_table) - + output: tuple val(tumorname), val(normalname), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf.gz") - + shell: ''' tumor_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{tumor_con_table} | cut -f2 ))" | bc -l) @@ -402,8 +412,8 @@ process varscan_tn { -R !{GENOMEREF} -SD !{GENOMEDICT} \ -O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf - printf "NORMAL\t!{normalname}\nTUMOR\t!{tumorname}\n" > sampname - + printf "NORMAL\t!{normalname}\nTUMOR\t!{tumorname}\n" > sampname + bcftools reheader -s sampname !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf \ | bcftools view -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.gz @@ -418,17 +428,18 @@ process varscan_tn { process octopus_tn { + container = "${params.containers.octopus}" //label 'process_highcpu' Using separate docker for octopus input: - tuple val(tumorname), path(tumor), path(tumorbai), + tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) - + output: tuple val("${tumorname}_vs_${normalname}"), path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz") - + script: """ @@ -443,32 +454,33 @@ process octopus_tn { """ stub: - + """ touch "${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz" """ -} +} process lofreq_tn { - label 'process_somaticcaller' + container = "${params.containers.logan}" + label 'process_somaticcaller' input: - tuple val(tumorname), path(tumor), path(tumorbai), + tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) - + output: - - tuple val(tumorname), val(normalname), + + tuple val(tumorname), val(normalname), path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz"), path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"), path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"), path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz"), path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz"), path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi") - + script: """ @@ -478,16 +490,16 @@ process lofreq_tn { -l ${bed} \ --call-indels \ -o ${tumorname}_vs_${normalname}_${bed.simpleName}_ - + bcftools concat ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz \ ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz --threads $task.cpus -Oz -o \ ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz $LOFREQ_CONVERT -i ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz -g 1/0 \ -n ${tumorname} -o ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz - + bcftools view -h ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz >temphead - + sed 's/^##FORMAT=/##FORMAT=/' temphead > temphead1 bcftools reheader ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz -h temphead1 |\ bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz @@ -497,71 +509,73 @@ process lofreq_tn { """ stub: - + """ touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz" touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz" touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz" touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz" touch "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz" "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi" - + """ -} +} process muse_tn { - label 'process_somaticcaller' + container = "${params.containers.logan}" + label 'process_somaticcaller' input: - tuple val(tumorname), path(tumor), path(tumorbai), + tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai) - + output: tuple val(tumorname), val(normalname), path("${tumorname}_vs_${normalname}.vcf.gz") - + script: """ MuSE call -f $GENOMEREF -O ${tumorname}_vs_${normalname} -n $task.cpus $tumor $normal MuSE sump -I ${tumorname}_vs_${normalname}.MuSE.txt \ -O ${tumorname}_vs_${normalname}.vcf -n $task.cpus -D $DBSNP -G - + bcftools view ${tumorname}_vs_${normalname}.vcf -Oz -o ${tumorname}_vs_${normalname}_temp.vcf.gz - printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" > sampname - + printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" > sampname + bcftools reheader -s sampname ${tumorname}_vs_${normalname}_temp.vcf.gz \ | bcftools view -Oz -o ${tumorname}_vs_${normalname}.vcf.gz """ stub: - + """ touch "${tumorname}_vs_${normalname}.vcf.gz" """ -} +} process combineVariants { + container = "${params.containers.logan}" label 'process_highmem' input: tuple val(sample), path(inputvcf), val(vc) - + output: - tuple val(sample), - path("${vc}/${sample}.${vc}.marked.vcf.gz"), - path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), + tuple val(sample), + path("${vc}/${sample}.${vc}.marked.vcf.gz"), + path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), path("${vc}/${sample}.${vc}.norm.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi") script: vcfin = inputvcf.join(" -I ") - + """ mkdir ${vc} gatk --java-options "-Xmx48g" SortVcf \ @@ -596,21 +610,22 @@ process combineVariants { process combineVariants_alternative { + container = "${params.containers.logan}" label 'process_highmem' input: tuple val(sample), path(vcfs), path(vcfsindex), val(vc) - + output: - tuple val(sample), - path("${vc}/${sample}.${vc}.marked.vcf.gz"), - path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), + tuple val(sample), + path("${vc}/${sample}.${vc}.marked.vcf.gz"), + path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), path("${vc}/${sample}.${vc}.norm.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi") - + script: vcfin = vcfs.join(" ") - + """ mkdir ${vc} bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz @@ -636,13 +651,14 @@ process combineVariants_alternative { touch ${vc}/${sample}.${vc}.norm.vcf.gz touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi - + """ } process bcftools_index_octopus { + container = "${params.containers.logan}" label 'process_low' input: @@ -651,10 +667,10 @@ process bcftools_index_octopus { output: tuple val(tumor), - path(vcf), + path(vcf), path("${vcf}.tbi") - - script: + + script: """ bcftools index -t ${vcf} """ @@ -670,32 +686,33 @@ process bcftools_index_octopus { process combineVariants_strelka { //Concat all somatic snvs/indels across all files, strelka separates snv/indels + container = "${params.containers.logan}" label 'process_mid' input: - tuple val(sample), + tuple val(sample), path(strelkasnvs), path(snvindex), path(strelkaindels), path(indelindex) - + output: - tuple val(sample), + tuple val(sample), path("${sample}.strelka.vcf.gz"), path("${sample}.strelka.vcf.gz.tbi"), path("${sample}.filtered.strelka.vcf.gz"), path("${sample}.filtered.strelka.vcf.gz.tbi") - - + + script: - + vcfin = strelkasnvs.join(" ") indelsin = strelkaindels.join(" ") """ - bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz -a + bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz -a bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz - bcftools sort ${sample}.temp1.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz + bcftools sort ${sample}.temp1.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz bcftools view ${sample}.strelka.vcf.gz --threads $task.cpus -f PASS -Oz -o ${sample}.filtered.strelka.vcf.gz @@ -708,16 +725,17 @@ process combineVariants_strelka { """ touch ${sample}.strelka.vcf.gz ${sample}.strelka.vcf.gz.tbi touch ${sample}.filtered.strelka.vcf.gz ${sample}.filtered.strelka.vcf.gz.tbi - + """ } process somaticcombine { + container = "${params.containers.logan}" label 'process_mid' - input: + input: tuple val(tumorsample), val(normal), val(callers), path(vcfs), path(vcfindex) @@ -730,7 +748,7 @@ process somaticcombine { script: vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } vcfin2="-V:" + vcfin1.join(" -V:") - + """ java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \ -R $GENOMEREF \ @@ -753,10 +771,11 @@ process somaticcombine { } -process annotvep_tn { +process annotvep_tn { + container = "${params.containers.vcf2maf}" input: - tuple val(tumorsample), val(normalsample), - val(vc), path(tumorvcf), path(vcfindex) + tuple val(tumorsample), val(normalsample), + val(vc), path(tumorvcf), path(vcfindex) output: path("paired/${vc}/${tumorsample}_vs_${normalsample}.maf") @@ -771,15 +790,15 @@ process annotvep_tn { NORM_VCF_ID_ARG="" NSAMPLES=${#VCF_SAMPLE_IDS[@]} if [ $NSAMPLES -gt 1 ]; then - # Assign tumor, normal IDs - # Look through column names and + # Assign tumor, normal IDs + # Look through column names and # see if they match provided IDs for (( i = 0; i < $NSAMPLES; i++ )); do echo "${VCF_SAMPLE_IDS[$i]}" if [ "${VCF_SAMPLE_IDS[$i]}" == !{tumorsample} ]; then TID_IDX=$i fi - + if [ "${VCF_SAMPLE_IDS[$i]}" == !{normalsample} ]; then NID_IDX=$i fi @@ -791,9 +810,9 @@ process annotvep_tn { fi fi VCF_TID=${VCF_SAMPLE_IDS[$TID_IDX]} - + zcat !{tumorvcf} > !{tumorvcf.baseName} - + mkdir -p paired/!{vc} vcf2maf.pl \ @@ -817,10 +836,12 @@ process annotvep_tn { process combinemafs_tn { + container = "${params.containers.logan}" + label 'process_low' publishDir(path: "${outdir}/mafs/paired", mode: 'copy') - input: + input: path(allmafs) output: @@ -840,5 +861,3 @@ process combinemafs_tn { touch final_tn.maf """ } - - diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index 9f8bf93..bdc5731 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -1,10 +1,10 @@ GENOMEREF=file(params.genomes[params.genome].genome) GENOMEFAI=file(params.genomes[params.genome].genomefai) GENOMEDICT=file(params.genomes[params.genome].genomedict) -KGPGERMLINE=params.genomes[params.genome].kgp -DBSNP=file(params.genomes[params.genome].dbsnp) -GNOMADGERMLINE=params.genomes[params.genome].gnomad -PON=file(params.genomes[params.genome].pon) +KGPGERMLINE=params.genomes[params.genome].kgp +DBSNP=file(params.genomes[params.genome].dbsnp) +GNOMADGERMLINE=params.genomes[params.genome].gnomad +PON=file(params.genomes[params.genome].pon) VEPCACHEDIR=file(params.genomes[params.genome].vepcache) VEPSPECIES=params.genomes[params.genome].vepspecies VEPBUILD=params.genomes[params.genome].vepbuild @@ -13,11 +13,13 @@ GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest process pileup_paired_tonly { + container = "${params.containers.logan}" + label 'process_highmem' input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) - + output: tuple val(tumorname), path("${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table") @@ -29,7 +31,7 @@ process pileup_paired_tonly { -I ${tumor} \ -V $KGPGERMLINE \ -L ${bed} \ - -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table + -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table """ @@ -43,6 +45,8 @@ process pileup_paired_tonly { process contamination_tumoronly { + container = "${params.containers.logan}" + label 'process_highmem' input: @@ -63,7 +67,7 @@ process contamination_tumoronly { gatk GatherPileupSummaries \ --sequence-dictionary $GENOMEDICT \ -I ${alltumor} -O ${tumorname}_allpileups.table - + gatk CalculateContamination \ -I ${tumorname}_allpileups.table \ -O ${tumorname}.contamination.table @@ -81,15 +85,17 @@ process contamination_tumoronly { process learnreadorientationmodel_tonly { + container = "${params.containers.logan}" + label 'process_highmem' input: tuple val(sample), path(f1r2) - + output: tuple val(sample), path("${sample}.read-orientation-model.tar.gz") - script: + script: f1r2in = f1r2.join(" --input ") """ @@ -109,15 +115,17 @@ process learnreadorientationmodel_tonly { process mergemut2stats_tonly { + container = "${params.containers.logan}" + label 'process_low' input: tuple val(sample), path(stats) - + output: tuple val(sample), path("${sample}.final.stats") - script: + script: statsin = stats.join(" --stats ") """ @@ -136,16 +144,18 @@ process mergemut2stats_tonly { process mutect2_t_tonly { + container = "${params.containers.logan}" + label 'process_somaticcaller' input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) - + output: tuple val(tumorname), path("${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz"), path("${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz"), path("${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz.stats") - + script: """ @@ -158,7 +168,7 @@ process mutect2_t_tonly { --panel-of-normals $PON \ --output ${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz \ --f1r2-tar-gz ${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz \ - --independent-mates + --independent-mates """ stub: @@ -174,14 +184,16 @@ process mutect2_t_tonly { process mutect2filter_tonly { + container = "${params.containers.logan}" + label 'process_mid' input: tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups),path(tumorcontamination) output: - tuple val(sample), - path("${sample}.tonly.mut2.marked.vcf.gz"),path("${sample}.tonly.mut2.marked.vcf.gz.tbi"), - path("${sample}.tonly.mut2.norm.vcf.gz"),path("${sample}.tonly.mut2.norm.vcf.gz.tbi"), + tuple val(sample), + path("${sample}.tonly.mut2.marked.vcf.gz"),path("${sample}.tonly.mut2.marked.vcf.gz.tbi"), + path("${sample}.tonly.mut2.norm.vcf.gz"),path("${sample}.tonly.mut2.norm.vcf.gz.tbi"), path("${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv") script: @@ -190,8 +202,8 @@ process mutect2filter_tonly { """ - gatk GatherVcfs -I ${mut2in} -O ${sample}.tonly.concat.vcf.gz - gatk IndexFeatureFile -I ${sample}.tonly.concat.vcf.gz + gatk GatherVcfs -I ${mut2in} -O ${sample}.tonly.concat.vcf.gz + gatk IndexFeatureFile -I ${sample}.tonly.concat.vcf.gz gatk FilterMutectCalls \ -R $GENOMEREF \ -V ${sample}.tonly.concat.vcf.gz \ @@ -225,16 +237,18 @@ process mutect2filter_tonly { process varscan_tonly { + container = "${params.containers.logan}" + label 'process_somaticcaller' input: - tuple val(tumorname), path(tumor), path(tumorbai), + tuple val(tumorname), path(tumor), path(tumorbai), path(bed), path(tumorpileup), path(tumor_con_table) - + output: tuple val(tumorname), path("${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf.gz") - + shell: ''' @@ -247,8 +261,8 @@ process varscan_tonly { awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp \ | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf - printf "TUMOR\t!{tumorname}\n" > sampname - + printf "TUMOR\t!{tumorname}\n" > sampname + bcftools reheader -s sampname !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf \ | bcftools view -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf.gz @@ -263,14 +277,16 @@ process varscan_tonly { process vardict_tonly { + container = "${params.containers.logan}" + label 'process_highcpu' input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) - + output: tuple val(tumorname), path("${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz") - + script: """ @@ -291,15 +307,15 @@ process vardict_tonly { -E \ -f 0.05 > ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf - printf "${tumor.Name}\t${tumorname}\n" > sampname - + printf "${tumor.Name}\t${tumorname}\n" > sampname + bcftools reheader -s sampname ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf \ | bcftools view -Oz -o ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz """ stub: - + """ touch ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz @@ -309,15 +325,16 @@ process vardict_tonly { process octopus_tonly { + container = "${params.containers.octopus}" //label 'process_highcpu' input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) - + output: tuple val(tumorname), path("${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz") - + script: """ @@ -332,7 +349,7 @@ process octopus_tonly { """ stub: - + """ touch ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz """ @@ -341,11 +358,13 @@ process octopus_tonly { process somaticcombine_tonly { + container = "${params.containers.logan}" + label 'process_mid' publishDir(path: "${outdir}/vcfs/combined_tonly", mode: 'copy') - input: - tuple val(tumorsample), + input: + tuple val(tumorsample), val(callers), path(vcfs), path(vcfindex) @@ -376,11 +395,13 @@ process somaticcombine_tonly { } process annotvep_tonly { + container = "${params.containers.vcf2maf}" + publishDir("${outdir}/mafs", mode: "copy") input: - tuple val(tumorsample), - val(vc), path(tumorvcf), + tuple val(tumorsample), + val(vc), path(tumorvcf), path(vcfindex) @@ -397,15 +418,15 @@ process annotvep_tonly { NORM_VCF_ID_ARG="" NSAMPLES=${#VCF_SAMPLE_IDS[@]} if [ $NSAMPLES -gt 1 ]; then - # Assign tumor, normal IDs - # Look through column names and + # Assign tumor, normal IDs + # Look through column names and # see if they match provided IDs for (( i = 0; i < $NSAMPLES; i++ )); do echo "${VCF_SAMPLE_IDS[$i]}" if [ "${VCF_SAMPLE_IDS[$i]}" == !{tumorsample} ]; then TID_IDX=$i fi - + done if [ ! -z $NID_IDX ]; then @@ -414,9 +435,9 @@ process annotvep_tonly { fi fi VCF_TID=${VCF_SAMPLE_IDS[$TID_IDX]} - + zcat !{tumorvcf} > !{tumorvcf.baseName} - + mkdir -p tumor_only/!{vc} vcf2maf.pl \ @@ -439,9 +460,10 @@ process annotvep_tonly { } process combinemafs_tonly { + container = "${params.containers.logan}" label 'process_low' - input: + input: path(allmafs) output: @@ -449,7 +471,7 @@ process combinemafs_tonly { shell: mafin= allmafs.join(" ") - + """ echo "Combining MAFs..." head -2 ${allmafs[0]} > final_tonly.maf @@ -461,6 +483,3 @@ process combinemafs_tonly { touch final_tonly.maf """ } - - - diff --git a/nextflow.config b/nextflow.config index 3a3392d..6f66adb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -7,6 +7,7 @@ manifest { mainScript = "main.nf" } + includeConfig 'conf/containers.config' includeConfig 'conf/genomes.config' includeConfig 'conf/base.config' @@ -73,7 +74,7 @@ profiles { cacheDir = "$PWD/singularity" envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID' runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' - } + } biowulf { includeConfig 'conf/biowulf.config' } From a06fa5d0428dbe4b97b6570415c86810334eb372 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 9 Jan 2024 20:06:17 -0500 Subject: [PATCH 39/58] fix: bam cnv mode --- main.nf | 47 +++++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/main.nf b/main.nf index 2c8367b..0a715e1 100644 --- a/main.nf +++ b/main.nf @@ -5,12 +5,12 @@ date = new Date().format( 'yyyyMMdd' ) log.info """\ - L O G A N P I P E L I N E + L O G A N P I P E L I N E ============================= genome: ${params.genome} outdir: ${params.outdir} Sample Sheet: ${params.sample_sheet} - Samples: ${params.fastq_input} ${params.fastq_file_input} ${params.bam_input} ${params.bam_file_input} + Samples: ${params.fastq_input} ${params.fastq_file_input} ${params.bam_input} ${params.bam_file_input} """ .stripIndent() @@ -49,10 +49,10 @@ workflow { //Tumor-Normal VC, SV, CNV if (params.vc){ VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet) - } + } if (params.sv){ SV(ALIGN.out.bamwithsample) - } + } if (params.cnv){ if (params.genome == "mm10"){ CNVmouse(ALIGN.out.bamwithsample) @@ -64,25 +64,25 @@ workflow { CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input) } } - } + } if (params.qc && params.gl){ QC_GL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout,GL.out.glnexusout,GL.out.bcfout) } else if (params.qc){ QC_NOGL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout) - } + } } - + //TUMOR-NOMRAL BAM INPUT if ([params.bam_input,params.bam_file_input].any() && params.sample_sheet){ println "Tumor-Normal with BAMs" INPUT_BAM() if (params.vc){ VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) - } + } if (params.sv){ SV(INPUT_BAM.out.bamwithsample) - } + } if (params.cnv){ if (params.genome == "mm10"){ CNVmouse(INPUT_BAM.out.bamwithsample) @@ -90,13 +90,13 @@ workflow { if (!params.vc){ VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input) - }else { + }else { CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input) } } } - } - + } + ///Tumor Only Pipelines if ([params.fastq_input,params.file_input].any() && !params.sample_sheet){ println "Tumor-Only FASTQ" @@ -115,7 +115,7 @@ workflow { if (!params.vc){ VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet) CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input) - } else{ + } else { CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input) } } @@ -134,20 +134,19 @@ workflow { } if (params.sv){ SV_TONLY(INPUT_TONLY_BAM.out.bamwithsample) - } + } if (params.cnv){ if (params.genome == "mm10"){ CNVmouse_tonly(INPUT_TONLY_BAM.out.bamwithsample) } else if (params.genome== "hg38"){ - VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) - CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input) - } - } - - } + if (!params.vc){ + VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) + CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input) + } else { + CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input) + } + } + } + } } - - - - From 1484caea5a91395c92b4a233165e3affe5fdae25 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 10 Jan 2024 10:58:19 -0500 Subject: [PATCH 40/58] fix: update task (cpu/mem) profiles and output dirs --- conf/base.config | 11 +++++++++ conf/modules.config | 12 ++++++---- modules/local/copynumber.nf | 20 +++++----------- modules/local/qc.nf | 10 ++++---- modules/local/structural_variant.nf | 32 ++++++++++++-------------- modules/local/trim_align.nf | 7 +++--- modules/local/variant_calling.nf | 10 ++++---- modules/local/variant_calling_tonly.nf | 9 +++----- 8 files changed, 56 insertions(+), 55 deletions(-) diff --git a/conf/base.config b/conf/base.config index c525125..e6f151c 100644 --- a/conf/base.config +++ b/conf/base.config @@ -51,6 +51,16 @@ process { withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } } + withLabel:process_somaticcaller { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 64.GB * task.attempt, 'memory' ) } + time = { check_max( 72.h * task.attempt, 'time' ) } + } + withLabel:process_highmem { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 48.GB * task.attempt, 'memory' ) } + time = { check_max( 72.h * task.attempt, 'time' ) } + } withLabel:error_ignore { errorStrategy = 'ignore' } @@ -58,4 +68,5 @@ process { errorStrategy = 'retry' maxRetries = 2 } + } diff --git a/conf/modules.config b/conf/modules.config index a5ff333..f3b5495 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -9,7 +9,6 @@ process { errorStrategy = 'finish' withName:'sequenza' { - container = 'dnousome/ccbr_logan_base:v0.3.3' publishDir = [ path: { "${params.outdir}/cnv/sequenza" }, mode: 'copy' @@ -22,12 +21,9 @@ process { path: { "${params.outdir}/cnv/freec_paired" }, mode: 'copy' ] - container = 'dnousome/ccbr_logan_base:v0.3.3' - } withName:'freec' { - container = 'dnousome/ccbr_logan_base:v0.3.3' publishDir = [ path: { "${params.outdir}/cnv/freec_unpaired" }, mode: 'copy' @@ -292,4 +288,12 @@ process { mode: 'copy' ] } + + + withName: 'octopus_tn|octopus_tonly' { + memory = 72.GB + time = 24.h + cpus = 16 + } + } diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf index e5e0691..0f4a92b 100644 --- a/modules/local/copynumber.nf +++ b/modules/local/copynumber.nf @@ -127,9 +127,7 @@ process sequenza { process freec_paired { container = "${params.containers.logan}" - label 'process_highcpu' - publishDir("${outdir}/cnv/freec_paired", mode: 'copy') input: tuple val(tumorname), path(tumor), path(tumorbai), @@ -194,9 +192,7 @@ process freec_paired { process freec { container = "${params.containers.logan}" - - label 'process_mid' - publishDir("${outdir}/cnv/freec_unpaired", mode: 'copy') + label 'process_medium' input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -260,7 +256,7 @@ process freec { process amber_tonly { container = "${params.containers.logan}" - label 'process_mid' + label 'process_medium' input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -297,7 +293,7 @@ process amber_tonly { process amber_tn { container = "${params.containers.logan}" - label 'process_mid' + label 'process_medium' input: tuple val(tumorname), path(tumor), path(tumorbai), @@ -335,7 +331,7 @@ process amber_tn { process cobalt_tonly { container = "${params.containers.logan}" - label "process_mid" + label 'process_medium' input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -370,7 +366,7 @@ process cobalt_tonly { process cobalt_tn { container = "${params.containers.logan}" - label "process_mid" + label 'process_medium' input: tuple val(tumorname), path(tumor), path(tumorbai), @@ -407,9 +403,7 @@ process cobalt_tn { process purple { container = "${params.containers.logan}" - - label 'process_mid' - publishDir("${outdir}/cnv/purple", mode: 'copy') + label 'process_medium' input: tuple val(tumorname), @@ -451,8 +445,6 @@ process purple { process ascat_tn { module=["java/12.0.1","R/3.6.3"] - publishDir("${outdir}/purple", mode: 'copy') - input: tuple val(samplename), path(cobaltin), path(amberin), path("${samplename}.tonly.final.mut2.vcf.gz") diff --git a/modules/local/qc.nf b/modules/local/qc.nf index 04b8022..f8bbc89 100644 --- a/modules/local/qc.nf +++ b/modules/local/qc.nf @@ -226,7 +226,7 @@ process samtools_flagstats { @Output: Text file containing alignment statistics */ - label 'process_mid' + label 'process_medium' input: tuple val(samplename), path(bam), path(bai) @@ -296,7 +296,7 @@ process vcftools { @Output: Text file containing a measure of heterozygosity */ - label 'process_mid' + label 'process_medium' input: @@ -366,7 +366,7 @@ process bcftools_stats { Text file containing a collection of summary statistics */ - label 'process_mid' + label 'process_medium' input: tuple val(samplename), path("${samplename}.gvcf.gz"),path("${samplename}.gvcf.gz.tbi") @@ -398,7 +398,7 @@ process gatk_varianteval { @Output: Evaluation table containing a collection of summary statistics */ - label 'process_mid' + label 'process_medium' input: tuple val(samplename), path("${samplename}.gvcf.gz") ,path("${samplename}.gvcf.gz.tbi") @@ -440,7 +440,7 @@ process snpeff { @Output: Evaluation table containing a collection of summary statistics */ - label 'process_mid' + label 'process_medium' input: tuple val(samplename), path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi") diff --git a/modules/local/structural_variant.nf b/modules/local/structural_variant.nf index a6f58f4..dda67c7 100644 --- a/modules/local/structural_variant.nf +++ b/modules/local/structural_variant.nf @@ -1,7 +1,7 @@ GENOMEREF=file(params.genomes[params.genome].genome) GENOME=params.genome BWAGENOME=file(params.genomes[params.genome].bwagenome) -DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) +DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) @@ -10,7 +10,7 @@ process svaba_somatic { input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai) - + output: tuple val(tumorname), path("${tumor.simpleName}.bps.txt.gz"), @@ -34,7 +34,7 @@ process svaba_somatic { """ stub: - + """ touch "${tumor.simpleName}.bps.txt.gz" touch "${tumor.simpleName}.contigs.bam" @@ -61,7 +61,7 @@ process manta_somatic { input: tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai) - + output: tuple val(tumorname), path("${tumor.simpleName}.diplodSV.vcf.gz"), @@ -80,7 +80,7 @@ process manta_somatic { --runDir=wd wd/runWorkflow.py -m local -j 10 -g 10 - + mv wd/results/variants/diploidSV.vcf.gz ${tumor.simpleName}.diplodSV.vcf.gz mv wd/results/variants/somaticSV.vcf.gz ${tumor.simpleName}.somaticSV.vcf.gz mv wd/results/variants/candidateSV.vcf.gz ${tumor.simpleName}.candidateSV.vcf.gz @@ -89,7 +89,7 @@ process manta_somatic { """ stub: - + """ touch ${tumor.simpleName}.diplodSV.vcf.gz touch ${tumor.simpleName}.somaticSV.vcf.gz @@ -140,7 +140,7 @@ process manta_tonly { input: tuple val(tumorname), path(tumor), path(tumorbai) - + output: tuple val(tumorname), path("${tumor.simpleName}.candidateSV.vcf.gz"), @@ -158,7 +158,7 @@ process manta_tonly { --runDir=wd wd/runWorkflow.py -m local -j 10 -g 10 - + mv wd/results/variants/candidateSV.vcf.gz ${tumor.simpleName}.candidateSV.vcf.gz mv wd/results/variants/candidateSmallIndels.vcf.gz ${tumor.simpleName}.candidateSmallIndels.vcf.gz mv wd/results/variants/tumorSV.vcf.gz ${tumor.simpleName}.tumorSV.vcf.gz @@ -166,7 +166,7 @@ process manta_tonly { """ stub: - + """ touch ${tumor.simpleName}.candidateSV.vcf.gz touch ${tumor.simpleName}.candidateSmallIndels.vcf.gz @@ -182,7 +182,7 @@ process svaba_tonly { input: tuple val(tumorname), path(tumor), path(tumorbai) - + output: tuple val(tumorname), path("${tumor.simpleName}.bps.txt.gz"), @@ -202,7 +202,7 @@ process svaba_tonly { """ stub: - + """ touch "${tumor.simpleName}.bps.txt.gz" touch "${tumor.simpleName}.contigs.bam" @@ -221,11 +221,11 @@ process svaba_tonly { process gunzip { input: - tuple val(tumorname), + tuple val(tumorname), path(vcf), val(sv) output: - tuple val(tumorname), + tuple val(tumorname), path("${tumorname}.tumorSV.vcf"), val(sv) script: @@ -246,7 +246,7 @@ process survivor_sv { module = ['survivor'] input: - tuple val(tumorname), + tuple val(tumorname), path(vcfs),val(svs) output: @@ -275,9 +275,7 @@ process survivor_sv { process annotsv_tonly { //AnnotSV for Manta/Svaba works with either vcf.gz or .vcf files //Requires bedtools,bcftools - module = ['annotsv/3.3.1'] - publishDir(path: "${outdir}/SV/annotated_tonly", mode: 'copy') input: tuple val(tumorname), path(somaticvcf), val(sv) @@ -306,4 +304,4 @@ process annotsv_tonly { touch "${sv}/${tumorname}.tsv" touch "${sv}/${tumorname}.unannotated.tsv" """ -} \ No newline at end of file +} diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf index 8a69287..1ceb063 100644 --- a/modules/local/trim_align.nf +++ b/modules/local/trim_align.nf @@ -4,7 +4,7 @@ KNOWNRECAL = params.genomes[params.genome].KNOWNRECAL process fastp { container = "${params.containers.logan}" - label 'process_mid' + label 'process_medium' tag { name } input: @@ -169,8 +169,7 @@ process applybqsr { process samtoolsindex { container = "${params.containers.logan}" - label 'process_mid' - publishDir(path: "${outdir}/bams/BQSR", mode: 'copy') + label 'process_medium' input: tuple val(bamname), path(bam) @@ -193,7 +192,7 @@ process samtoolsindex { //Save to CRAM for output process bamtocram_tonly { container = "${params.containers.logan}" - label 'process_mid' + label 'process_medium' input: tuple val(tumorname), path(tumor), path(tumorbai) diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index c580c2b..1d8e560 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -224,7 +224,7 @@ process mergemut2stats { process mutect2filter { container = "${params.containers.logan}" - label 'process_mid' + label 'process_medium' input: tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs), @@ -687,7 +687,7 @@ process bcftools_index_octopus { process combineVariants_strelka { //Concat all somatic snvs/indels across all files, strelka separates snv/indels container = "${params.containers.logan}" - label 'process_mid' + label 'process_medium' input: tuple val(sample), @@ -733,7 +733,7 @@ process combineVariants_strelka { process somaticcombine { container = "${params.containers.logan}" - label 'process_mid' + label 'process_medium' input: tuple val(tumorsample), val(normal), @@ -772,7 +772,9 @@ process somaticcombine { process annotvep_tn { + label 'process_medium' container = "${params.containers.vcf2maf}" + input: tuple val(tumorsample), val(normalsample), val(vc), path(tumorvcf), path(vcfindex) @@ -837,9 +839,7 @@ process annotvep_tn { process combinemafs_tn { container = "${params.containers.logan}" - label 'process_low' - publishDir(path: "${outdir}/mafs/paired", mode: 'copy') input: path(allmafs) diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index bdc5731..02a3435 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -186,7 +186,7 @@ process mutect2_t_tonly { process mutect2filter_tonly { container = "${params.containers.logan}" - label 'process_mid' + label 'process_medium' input: tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups),path(tumorcontamination) @@ -359,9 +359,7 @@ process octopus_tonly { process somaticcombine_tonly { container = "${params.containers.logan}" - - label 'process_mid' - publishDir(path: "${outdir}/vcfs/combined_tonly", mode: 'copy') + label 'process_medium' input: tuple val(tumorsample), @@ -396,8 +394,7 @@ process somaticcombine_tonly { process annotvep_tonly { container = "${params.containers.vcf2maf}" - - publishDir("${outdir}/mafs", mode: "copy") + label 'process_medium' input: tuple val(tumorsample), From 8b9fb63e969a1bed786d0f3a95a03f6358f47ff0 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 10 Jan 2024 13:22:16 -0500 Subject: [PATCH 41/58] fix: increase mem for vcs --- conf/base.config | 5 +++++ conf/modules.config | 7 ------- modules/local/variant_calling.nf | 2 +- modules/local/variant_calling_tonly.nf | 2 +- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/conf/base.config b/conf/base.config index e6f151c..0eb1dee 100644 --- a/conf/base.config +++ b/conf/base.config @@ -56,6 +56,11 @@ process { memory = { check_max( 64.GB * task.attempt, 'memory' ) } time = { check_max( 72.h * task.attempt, 'time' ) } } + withLabel:process_somaticcaller_high { + cpus = { check_max( 16 * task.attempt, 'cpus' ) } + memory = { check_max( 70.GB * task.attempt, 'memory' ) } + time = { check_max( 48.h * task.attempt, 'time' ) } + } withLabel:process_highmem { cpus = { check_max( 4 * task.attempt, 'cpus' ) } memory = { check_max( 48.GB * task.attempt, 'memory' ) } diff --git a/conf/modules.config b/conf/modules.config index f3b5495..a9bb769 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -289,11 +289,4 @@ process { ] } - - withName: 'octopus_tn|octopus_tonly' { - memory = 72.GB - time = 24.h - cpus = 16 - } - } diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index 1d8e560..2b87dfa 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -429,7 +429,7 @@ process varscan_tn { process octopus_tn { container = "${params.containers.octopus}" - //label 'process_highcpu' Using separate docker for octopus + label 'process_somaticcaller_high' input: tuple val(tumorname), path(tumor), path(tumorbai), diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index 02a3435..9324011 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -326,7 +326,7 @@ process vardict_tonly { process octopus_tonly { container = "${params.containers.octopus}" - //label 'process_highcpu' + label 'process_somaticcaller_high' input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) From 99145a82fb3c06fc321fb246f0a50046d201d87d Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 10 Jan 2024 22:03:35 -0500 Subject: [PATCH 42/58] fix: vc cpus --- modules/local/variant_calling.nf | 40 +++++++++++++------------- modules/local/variant_calling_tonly.nf | 36 +++++++++++------------ 2 files changed, 36 insertions(+), 40 deletions(-) diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index 2b87dfa..2d01e01 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -15,7 +15,7 @@ LOFREQ_CONVERT=params.lofreq_convert process mutect2 { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_somaticcaller' input: @@ -56,7 +56,7 @@ process mutect2 { process pileup_paired_t { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_highmem' input: @@ -86,7 +86,7 @@ process pileup_paired_t { process pileup_paired_n { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_highmem' input: @@ -116,7 +116,7 @@ process pileup_paired_n { process contamination_paired { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_highmem' input: @@ -170,7 +170,7 @@ process contamination_paired { process learnreadorientationmodel { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_highmem' input: @@ -196,7 +196,7 @@ process learnreadorientationmodel { process mergemut2stats { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_low' input: @@ -223,7 +223,7 @@ process mergemut2stats { process mutect2filter { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_medium' input: @@ -274,7 +274,7 @@ process mutect2filter { process strelka_tn { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_highcpu' input: tuple val(tumorname), path(tumor), path(tumorbai), @@ -329,7 +329,7 @@ process strelka_tn { process vardict_tn { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_highcpu' input: @@ -379,7 +379,7 @@ process vardict_tn { process varscan_tn { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_somaticcaller' input: @@ -428,7 +428,7 @@ process varscan_tn { process octopus_tn { - container = "${params.containers.octopus}" + container "${params.containers.octopus}" label 'process_somaticcaller_high' input: @@ -463,7 +463,7 @@ process octopus_tn { process lofreq_tn { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_somaticcaller' input: @@ -523,7 +523,7 @@ process lofreq_tn { process muse_tn { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_somaticcaller' input: tuple val(tumorname), path(tumor), path(tumorbai), @@ -560,7 +560,7 @@ process muse_tn { process combineVariants { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_highmem' input: @@ -610,7 +610,7 @@ process combineVariants { process combineVariants_alternative { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_highmem' input: @@ -658,7 +658,7 @@ process combineVariants_alternative { process bcftools_index_octopus { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_low' input: @@ -686,7 +686,7 @@ process bcftools_index_octopus { process combineVariants_strelka { //Concat all somatic snvs/indels across all files, strelka separates snv/indels - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_medium' input: @@ -732,7 +732,7 @@ process combineVariants_strelka { process somaticcombine { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_medium' input: @@ -773,7 +773,7 @@ process somaticcombine { process annotvep_tn { label 'process_medium' - container = "${params.containers.vcf2maf}" + container "${params.containers.vcf2maf}" input: tuple val(tumorsample), val(normalsample), @@ -838,7 +838,7 @@ process annotvep_tn { process combinemafs_tn { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_low' input: diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index 9324011..fb4253e 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -13,7 +13,7 @@ GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest process pileup_paired_tonly { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_highmem' @@ -45,7 +45,7 @@ process pileup_paired_tonly { process contamination_tumoronly { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_highmem' @@ -85,7 +85,7 @@ process contamination_tumoronly { process learnreadorientationmodel_tonly { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_highmem' @@ -115,7 +115,7 @@ process learnreadorientationmodel_tonly { process mergemut2stats_tonly { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_low' @@ -144,9 +144,9 @@ process mergemut2stats_tonly { process mutect2_t_tonly { - container = "${params.containers.logan}" - + container "${params.containers.logan}" label 'process_somaticcaller' + input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) @@ -184,7 +184,7 @@ process mutect2_t_tonly { process mutect2filter_tonly { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_medium' @@ -219,7 +219,7 @@ process mutect2filter_tonly { --output ${sample}.tonly.mut2.final.vcf.gz bcftools sort ${sample}.tonly.mut2.final.vcf.gz |\ - bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + bcftools norm --threads ${task.cpus} --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ sed '/^\$/d' |\ bcftools view - -Oz -o ${sample}.tonly.mut2.norm.vcf.gz @@ -237,7 +237,7 @@ process mutect2filter_tonly { process varscan_tonly { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_somaticcaller' input: @@ -277,7 +277,7 @@ process varscan_tonly { process vardict_tonly { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_highcpu' input: @@ -297,7 +297,7 @@ process vardict_tonly { -x 500 \ --nosv \ -b ${tumor} --fisher \ - -t -Q 20 -c 1 -S 2 -E 3 --th $task.cpus \ + -t -Q 20 -c 1 -S 2 -E 3 --th ${task.cpus} \ temp_${bed} | var2vcf_valid.pl \ -N ${tumor} \ -Q 20 \ @@ -325,7 +325,7 @@ process vardict_tonly { process octopus_tonly { - container = "${params.containers.octopus}" + container "${params.containers.octopus}" label 'process_somaticcaller_high' input: @@ -336,20 +336,16 @@ process octopus_tonly { path("${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz") script: - """ octopus -R $GENOMEREF -C cancer -I ${tumor} \ --annotations AC AD DP \ --target-working-memory 64Gb \ -t ${bed} \ $SOMATIC_FOREST \ - -o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz --threads $task.cpus - - + -o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz --threads ${task.cpus} """ stub: - """ touch ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz """ @@ -358,7 +354,7 @@ process octopus_tonly { process somaticcombine_tonly { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_medium' input: @@ -393,7 +389,7 @@ process somaticcombine_tonly { } process annotvep_tonly { - container = "${params.containers.vcf2maf}" + container "${params.containers.vcf2maf}" label 'process_medium' input: @@ -457,7 +453,7 @@ process annotvep_tonly { } process combinemafs_tonly { - container = "${params.containers.logan}" + container "${params.containers.logan}" label 'process_low' input: From c08e15697fab72f0760a620894ae5f1604da8fe3 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Fri, 12 Jan 2024 11:11:56 -0500 Subject: [PATCH 43/58] fix: increase mem for vardict --- conf/modules.config | 3 +-- modules/local/variant_calling.nf | 2 +- modules/local/variant_calling_tonly.nf | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index a9bb769..d171cef 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -15,7 +15,6 @@ process { ] } - withName: 'freec_paired' { publishDir = [ path: { "${params.outdir}/cnv/freec_paired" }, @@ -23,7 +22,7 @@ process { ] } - withName:'freec' { + withName: 'freec' { publishDir = [ path: { "${params.outdir}/cnv/freec_unpaired" }, mode: 'copy' diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index 2d01e01..ef25413 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -330,7 +330,7 @@ process strelka_tn { process vardict_tn { container "${params.containers.logan}" - label 'process_highcpu' + label 'process_somaticcaller_high' input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index fb4253e..7cc8cc7 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -278,8 +278,8 @@ process varscan_tonly { process vardict_tonly { container "${params.containers.logan}" + label 'process_somaticcaller_high' - label 'process_highcpu' input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) From dfce46994d773b8b383b26dea881dd4f0d078819 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Fri, 12 Jan 2024 15:46:41 -0500 Subject: [PATCH 44/58] style: spacing --- modules/local/variant_calling_tonly.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index 7cc8cc7..c063f2a 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -185,7 +185,6 @@ process mutect2_t_tonly { process mutect2filter_tonly { container "${params.containers.logan}" - label 'process_medium' input: From 92fb74a65232adf77b17d6e0bb2bb0ee4ff4d1da Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Mon, 15 Jan 2024 11:31:01 -0500 Subject: [PATCH 45/58] feat: revert to GATK3 --- conf/containers.config | 2 +- modules/local/variant_calling.nf | 71 +++++++++++++++++++++++++- modules/local/variant_calling_tonly.nf | 66 +++++++++++++++++++++++- subworkflows/local/workflows.nf | 12 +++-- subworkflows/local/workflows_tonly.nf | 8 +-- 5 files changed, 149 insertions(+), 10 deletions(-) diff --git a/conf/containers.config b/conf/containers.config index 2ceaf3c..c929fec 100644 --- a/conf/containers.config +++ b/conf/containers.config @@ -2,7 +2,7 @@ params { containers { base = 'nciccbr/ccbr_ubuntu_base_20.04:v6.1' - logan = 'docker://dnousome/ccbr_logan_base:v0.3.3' + logan = 'docker://dnousome/ccbr_logan_base:v0.3.4' vcf2maf = 'docker://dnousome/ccbr_vcf2maf:v102.0.0' octopus = 'docker://dancooke/octopus:latest' diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index ef25413..03b17b7 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -221,6 +221,32 @@ process mergemut2stats { } +process octopus_convertvcf { + container "${params.containers.logan}" + label 'process_low' + + input: + tuple val(tumor), val(normal), + val(oct), path(vcf), path(vcfindex) + + output: + tuple val(tumor), val(normal), path("${tumor}.octopus.norm.vcf.gz"), + path("${tumor}.octopus.norm.vcf.gz.tbi") + + + script: + """ + zcat ${vcf} | sed 's/^##fileformat=VCFv4.3/##fileformat=VCFv4.2/' > ${tumor}_temp.octopus.norm.vcf + bgzip ${tumor}_temp.octopus.norm.vcf + mv ${tumor}_temp.octopus.norm.vcf.gz ${tumor}.octopus.norm.vcf.gz + bcftools index -t ${tumor}.octopus.norm.vcf.gz -f + """ + + stub: + """ + touch ${tumor}.octopus.norm.vcf.gz ${tumor}.octopus.norm.vcf.gz.tbi + """ +} process mutect2filter { container "${params.containers.logan}" @@ -731,6 +757,49 @@ process combineVariants_strelka { } +process somaticcombine { + container "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumorsample), val(normal), + val(callers), + path(vcfs), path(vcfindex) + + output: + tuple val(tumorsample), val(normal), + path("${tumorsample}_vs_${normal}_combined.vcf.gz"), + path("${tumorsample}_vs_${normal}_combined.vcf.gz.tbi") + + script: + vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + + """ + /usr/lib/jvm/java-8-openjdk-amd64/bin/java -jar \$GATK_JAR -T CombineVariants \ + -R $GENOMEREF \ + --genotypemergeoption PRIORITIZE \ + --rod_priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \ + --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED \ + -o ${tumorsample}_vs_${normal}_combined.vcf.gz \ + $vcfin2 + + """ + + stub: + vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + + """ + touch ${tumorsample}_vs_${normal}_combined.vcf.gz + touch ${tumorsample}_vs_${normal}_combined.vcf.gz.tbi + """ + +} + + + +/*DISCVR process somaticcombine { container "${params.containers.logan}" label 'process_medium' @@ -769,7 +838,7 @@ process somaticcombine { """ } - +*/ process annotvep_tn { label 'process_medium' diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index c063f2a..3ad791c 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -352,6 +352,70 @@ process octopus_tonly { +process octopus_convertvcf_tonly { + container "${params.containers.logan}" + label 'process_low' + + input: + tuple val(tumor), val(oct), path(vcf), path(vcfindex) + + output: + tuple val(tumor), path("${tumor}.octopus_tonly.norm.vcf.gz"), + path("${tumor}.octopus_tonly.norm.vcf.gz.tbi") + + + script: + """ + zcat ${vcf} | sed 's/^##fileformat=VCFv4.3/##fileformat=VCFv4.2/' > ${tumor}_temp.octopus_tonly.norm.vcf + bgzip ${tumor}_temp.octopus_tonly.norm.vcf + mv ${tumor}_temp.octopus_tonly.norm.vcf.gz ${tumor}.octopus_tonly.norm.vcf.gz + bcftools index -t ${tumor}.octopus_tonly.norm.vcf.gz -f + """ + + stub: + """ + touch ${tumor}.octopus_tonly.norm.vcf.gz ${tumor}.octopus_tonly.norm.vcf.gz.tbi + """ +} + + +process somaticcombine_tonly { + container "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumorsample), + val(callers), + path(vcfs), path(vcfindex) + + output: + tuple val(tumorsample), + path("${tumorsample}_combined_tonly.vcf.gz"), + path("${tumorsample}_combined_tonly.vcf.gz.tbi") + + script: + vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + + """ + /usr/lib/jvm/java-8-openjdk-amd64/bin/java -jar \$GATK_JAR -T CombineVariants \ + -R $GENOMEREF \ + --genotypemergeoption PRIORITIZE \ + --rod_priority_list mutect2_tonly,octopus_tonly,vardict_tonly,varscan_tonly \ + --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED \ + -o ${tumorsample}_combined_tonly.vcf.gz \ + $vcfin2 + """ + + stub: + """ + touch ${tumorsample}_combined_tonly.vcf.gz ${tumorsample}_combined_tonly.vcf.gz.tbi + """ + +} + + +/*DISCVRSeq process somaticcombine_tonly { container "${params.containers.logan}" label 'process_medium' @@ -386,7 +450,7 @@ process somaticcombine_tonly { """ } - +*/ process annotvep_tonly { container "${params.containers.vcf2maf}" label 'process_medium' diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 766a41a..d7fdcfe 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -19,7 +19,7 @@ include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; contamination_paired; learnreadorientationmodel;mergemut2stats; strelka_tn; combineVariants_strelka; varscan_tn; vardict_tn; lofreq_tn; muse_tn; - octopus_tn; bcftools_index_octopus; bcftools_index_octopus as bcftools_index_octopus_tonly; + octopus_tn; bcftools_index_octopus; bcftools_index_octopus as bcftools_index_octopus_tonly; octopus_convertvcf; combineVariants as combineVariants_vardict; combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan; combineVariants as combineVariants_varscan_tonly; combineVariants_alternative as combineVariants_lofreq; combineVariants as combineVariants_muse; @@ -34,7 +34,7 @@ include {mutect2_t_tonly; mutect2filter_tonly; varscan_tonly; vardict_tonly; octopus_tonly; contamination_tumoronly; learnreadorientationmodel_tonly; - mergemut2stats_tonly; + mergemut2stats_tonly; octopus_convertvcf_tonly; annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; annotvep_tonly as annotvep_tonly_combined; @@ -314,6 +314,8 @@ workflow VC { | map{samplename,marked,markedindex,normvcf,normindex -> tuple(samplename.split('_vs_')[0],samplename.split('_vs_')[1],"octopus",normvcf,normindex)} annotvep_tn_octopus(octopus_in) + octopus_in_sc = octopus_in | octopus_convertvcf + | map{tumor,vcf,vcfindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} //Octopus TOnly octopus_in_tonly=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed-> @@ -324,15 +326,17 @@ workflow VC { | join(sample_sheet) | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"octopus_tonly",normvcf,normindex)} annotvep_tonly_octopus(octopus_in_tonly) + octopus_in_tonly_sc=octopus_in_tonly | octopus_convertvcf_tonly + | map{tumor,vcf,vcfindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} //Combine All Variants Using VCF and Then Reannotate - mutect2_in|concat(strelka_in)|concat(octopus_in)|concat(muse_in)|concat(lofreq_in) + mutect2_in|concat(strelka_in)|concat(octopus_in_sc)|concat(muse_in)|concat(lofreq_in) | concat(vardict_in) |concat(varscan_in) | groupTuple(by:[0,1]) | somaticcombine | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)} | annotvep_tn_combined - mutect2_in_tonly|concat(octopus_in_tonly) + mutect2_in_tonly|concat(octopus_in_tonly_sc) | concat(vardict_in_tonly)|concat(varscan_in_tonly) | groupTuple() | somaticcombine_tonly | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index 9f19e1a..9673e9c 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -32,7 +32,7 @@ include {mutect2_t_tonly; mutect2filter_tonly; pileup_paired_tonly; octopus_tonly; contamination_tumoronly; learnreadorientationmodel_tonly; - mergemut2stats_tonly; + mergemut2stats_tonly; octopus_convertvcf_tonly; annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; annotvep_tonly as annotvep_tonly_combined; @@ -196,9 +196,11 @@ workflow VC_TONLY { | combineVariants_alternative | join(sample_sheet) | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} annotvep_tonly_octopus(octopus_in_tonly) + octopus_in_tonly_sc=octopus_in_tonly | octopus_convertvcf_tonly + | map{tumor,normvcf,normindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} - - mutect2_tonly_in | concat(octopus_in_tonly) + //Combined Variants and Annotated + mutect2_tonly_in | concat(octopus_in_tonly_sc) | concat(vardict_in_tonly) | concat(varscan_in_tonly) | groupTuple() | somaticcombine_tonly From 37ecadf6c979b9441adea1fe0e94ed3b56d9739a Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 23 Jan 2024 11:15:01 -0500 Subject: [PATCH 46/58] fix: vcf input order --- subworkflows/local/workflows.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index d7fdcfe..883f4ca 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -314,8 +314,8 @@ workflow VC { | map{samplename,marked,markedindex,normvcf,normindex -> tuple(samplename.split('_vs_')[0],samplename.split('_vs_')[1],"octopus",normvcf,normindex)} annotvep_tn_octopus(octopus_in) - octopus_in_sc = octopus_in | octopus_convertvcf - | map{tumor,vcf,vcfindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} + octopus_in_sc = octopus_in | octopus_convertvcf + | map{tumor,normal,vcf,vcfindex ->tuple(tumor,normal,"octopus",vcf,vcfindex)} //Octopus TOnly octopus_in_tonly=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed-> @@ -327,10 +327,10 @@ workflow VC { map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"octopus_tonly",normvcf,normindex)} annotvep_tonly_octopus(octopus_in_tonly) octopus_in_tonly_sc=octopus_in_tonly | octopus_convertvcf_tonly - | map{tumor,vcf,vcfindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} + | map{tumor,vcf,vcfindex ->tuple(tumor,"octopus_tonly",vcf,vcfindex)} //Combine All Variants Using VCF and Then Reannotate - mutect2_in|concat(strelka_in)|concat(octopus_in_sc)|concat(muse_in)|concat(lofreq_in) + mutect2_in|concat(strelka_in) | concat(octopus_in_sc) | concat(muse_in) | concat(lofreq_in) | concat(vardict_in) |concat(varscan_in) | groupTuple(by:[0,1]) | somaticcombine | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)} From 5df8e6206d631767c712c419a786fe3b53e8283b Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 23 Jan 2024 16:43:39 -0500 Subject: [PATCH 47/58] fix: rename output --- subworkflows/local/workflows.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 883f4ca..0b509ef 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -285,9 +285,9 @@ workflow VC { //VarScan TOnly varscan_in_tonly=bambyinterval.combine(contamination_paired.out) | map{tumor,bam,bai,normal,nbam,nbai,bed,tumorname2,tpile,npile,tumorc,normalc -> - tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly | groupTuple() - | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf/)[0][1].toInteger()},"varscan_tonly")} - | combineVariants_varscan_tonly + tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly | groupTuple + | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf.gz/)[0][1].toInteger()},"varscan_tonly")} + | combineVariants_varscan_tonly | join(sample_sheet) | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"varscan_tonly",normvcf,normindex)} annotvep_tonly_varscan(varscan_in_tonly) From d01739fa33efbb24900afc26476822e54ddabdb1 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 24 Jan 2024 16:49:34 -0500 Subject: [PATCH 48/58] fix: fixed combine mode --- modules/local/variant_calling.nf | 1 - subworkflows/local/workflows.nf | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index 03b17b7..e0b44c7 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -411,7 +411,6 @@ process varscan_tn { input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed), - val(tumor1), path(tumorpileup), path(normalpileup), path(tumor_con_table), path(normal_con_table) diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 0b509ef..6c4f201 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -275,18 +275,19 @@ workflow VC { annotvep_tonly_vardict(vardict_in_tonly) //VarScan TN - varscan_in=bambyinterval.combine(contamination_paired.out) - | varscan_tn | groupTuple(by:[0,1]) + varscan_in=bambyinterval.combine(contamination_paired.out,by:0) + | varscan_tn | groupTuple(by:[0,1]) | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).varscan.vcf.gz/)[0][1].toInteger()},"varscan")} | combineVariants_varscan | join(sample_sheet_paired) | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} annotvep_tn_varscan(varscan_in) + //VarScan TOnly - varscan_in_tonly=bambyinterval.combine(contamination_paired.out) - | map{tumor,bam,bai,normal,nbam,nbai,bed,tumorname2,tpile,npile,tumorc,normalc -> + varscan_in_tonly=bambyinterval.combine(contamination_paired.out,by:0) + | map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc -> tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly | groupTuple - | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf.gz/)[0][1].toInteger()},"varscan_tonly")} + | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf.gz/)[0][1].toInteger()},"varscan_tonly")} | combineVariants_varscan_tonly | join(sample_sheet) | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"varscan_tonly",normvcf,normindex)} @@ -347,7 +348,6 @@ workflow VC { emit: somaticcall_input=octopus_in - } From 220ebb283e75c58dfa641b8e11765b98c7bb58ac Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Mon, 12 Feb 2024 19:59:48 -0500 Subject: [PATCH 49/58] feat: added lofreq separate container for htslib update --- conf/containers.config | 2 +- modules/local/variant_calling.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/containers.config b/conf/containers.config index c929fec..c47abbb 100644 --- a/conf/containers.config +++ b/conf/containers.config @@ -4,7 +4,7 @@ params { base = 'nciccbr/ccbr_ubuntu_base_20.04:v6.1' logan = 'docker://dnousome/ccbr_logan_base:v0.3.4' vcf2maf = 'docker://dnousome/ccbr_vcf2maf:v102.0.0' + lofreq = 'docker://dnousome/ccbr_lofreq:v0.0.1' octopus = 'docker://dancooke/octopus:latest' - } } diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index e0b44c7..952cf95 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -488,7 +488,7 @@ process octopus_tn { process lofreq_tn { - container "${params.containers.logan}" + container "${params.containers.lofreq}" label 'process_somaticcaller' input: From 5efe9af9811cdd6bcb564dc7a4c4cae9ba2fe109 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 5 Mar 2024 14:09:05 -0500 Subject: [PATCH 50/58] fix: increase memory for vc --- conf/base.config | 4 ++-- modules/local/qc.nf | 27 ++++++++------------------- modules/local/trim_align.nf | 5 ----- 3 files changed, 10 insertions(+), 26 deletions(-) diff --git a/conf/base.config b/conf/base.config index 0eb1dee..0b5fd0c 100644 --- a/conf/base.config +++ b/conf/base.config @@ -58,8 +58,8 @@ process { } withLabel:process_somaticcaller_high { cpus = { check_max( 16 * task.attempt, 'cpus' ) } - memory = { check_max( 70.GB * task.attempt, 'memory' ) } - time = { check_max( 48.h * task.attempt, 'time' ) } + memory = { check_max( 96.GB * task.attempt, 'memory' ) } + time = { check_max( 72.h * task.attempt, 'time' ) } } withLabel:process_highmem { cpus = { check_max( 4 * task.attempt, 'cpus' ) } diff --git a/modules/local/qc.nf b/modules/local/qc.nf index f8bbc89..7732405 100644 --- a/modules/local/qc.nf +++ b/modules/local/qc.nf @@ -146,18 +146,13 @@ process fastqc { @Output: FastQC report and zip file containing sequencing quality information """ - - input: tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bai") output: tuple val(samplename), path("${samplename}_fastqc.html"), path("${samplename}_fastqc.zip") - //message: "Running FastQC with {threads} threads on '{input}' input file" - //threads: 8 - //module=['fastqc/0.11.9'] - script: + """ mkdir -p fastqc fastqc -t 8 \ @@ -404,14 +399,6 @@ process gatk_varianteval { tuple val(samplename), path("${samplename}.gvcf.gz") ,path("${samplename}.gvcf.gz.tbi") output: path("${samplename}.germline.eval.grp") - //params: - // rname = "vareval", - // genome = config['references']['GENOME'], - // dbsnp = config['references']['DBSNP'], - // ver_gatk = config['tools']['gatk4']['version'] - //message: "Running GATK4 VariantEval on '{input.vcf}' input file" - //container: config['images']['wes_base'] - //threads: 16 script: """ gatk --java-options '-Xmx12g -XX:ParallelGCThreads=16' VariantEval \ @@ -475,6 +462,12 @@ process somalier_extract { Mapped and pre-processed BAM file @Output: Exracted sites in (binary) somalier format + + params: + sites_vcf = config['references']['SOMALIER']['SITES_VCF'], + genomeFasta = config['references']['GENOME'], + rname = 'somalier_extract' + container: config['images']['wes_base'] */ label 'process_low' @@ -482,11 +475,7 @@ process somalier_extract { tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") output: path("output/${samplename}.somalier") - //params: - // sites_vcf = config['references']['SOMALIER']['SITES_VCF'], - // genomeFasta = config['references']['GENOME'], - // rname = 'somalier_extract' - //container: config['images']['wes_base'] + script: """ mkdir -p output diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf index 1ceb063..bcab724 100644 --- a/modules/local/trim_align.nf +++ b/modules/local/trim_align.nf @@ -209,11 +209,6 @@ process bamtocram_tonly { /* process indelrealign { - //Briefly, RealignerTargetCreator runs faster with increasing -nt threads, - //while IndelRealigner shows diminishing returns for increasing scatter - - tag { name } - input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") From 2caff97589c2e2c0195fd7d6067b52ee136df5fd Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 6 Mar 2024 10:09:41 -0500 Subject: [PATCH 51/58] fix: add the set column for vcf2maf --- modules/local/variant_calling.nf | 2 +- modules/local/variant_calling_tonly.nf | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index 952cf95..3b1386f 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -460,7 +460,6 @@ process octopus_tn { tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) - output: tuple val("${tumorname}_vs_${normalname}"), path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz") @@ -893,6 +892,7 @@ process annotvep_tn { --vep-path /opt/vep/src/ensembl-vep \ --vep-data !{VEPCACHEDIR} \ --ncbi-build !{VEPBUILD} --species !{VEPSPECIES} --ref-fasta !{GENOMEREF} \ + --retain-info "set" \ --vep-overwrite ''' diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index 3ad791c..e36622d 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -503,6 +503,7 @@ process annotvep_tonly { --vep-path /opt/vep/src/ensembl-vep \ --vep-data !{VEPCACHEDIR} \ --ncbi-build !{VEPBUILD} --species !{VEPSPECIES} --ref-fasta !{GENOMEREF} \ + --retain-info "set" \ --vep-overwrite From 98d7cb025a1cf0ff85abf208496a06cfe401851a Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 6 Mar 2024 10:18:02 -0500 Subject: [PATCH 52/58] fix: stub tests --- .github/workflows/tests.yaml | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index f6a9ea6..11e91d1 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -14,18 +14,22 @@ jobs: - uses: actions/checkout@v2 - uses: docker://nextflow/nextflow:22.10.8 - uses: actions/setup-python@v4 - with: - python-version: '3.9' - name: Tumor-normal FastQ Stub Run run: | docker run -v $PWD:/opt2 nextflow/nextflow:22.10.8 \ - python /opt2/wgs-seek --fastq "/opt2/.tests/*R{1,2}.fastq.gz" \ - --mode vc \ - --output /opt2/output_tn_fqs \ - --paired --sample_sheet "/opt2/.tests/pairs.tsv" --stub + nextflow run --fastq_input "/opt2/.tests/*R{1,2}.fastq.gz" \ + -profile ci_stub \ + --vc --cnv --sv \ + --genome hg38 \ + --outdir /opt2/output_tn_fqs \ + --sample_sheet "/opt2/.tests/pairs.tsv" \ + --stub - name: Tumor-only FastQ Stub Run run: | docker run -v $PWD:/opt2 nextflow/nextflow:22.10.8 \ - python /opt2/wgs-seek --fastq "/opt2/.tests/*R{1,2}.fastq.gz" \ - --mode vc \ - --output "/opt2/output_tonly_fqs" --stub \ No newline at end of file + nextflow run --fastq_input "/opt2/.tests/*R{1,2}.fastq.gz" \ + -profile ci_stub \ + --vc --cnv --sv \ + --genome hg38 \ + --outdir /opt2/output_tn_fqs \ + --stub From 0cac32fdb3c572e6185a08d0504f5e553790e7cb Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 6 Mar 2024 10:18:46 -0500 Subject: [PATCH 53/58] fix: action changes --- .github/workflows/tests.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 11e91d1..7cff41e 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -13,7 +13,6 @@ jobs: steps: - uses: actions/checkout@v2 - uses: docker://nextflow/nextflow:22.10.8 - - uses: actions/setup-python@v4 - name: Tumor-normal FastQ Stub Run run: | docker run -v $PWD:/opt2 nextflow/nextflow:22.10.8 \ From 11894af7df58e4a7b47f82a5937c5d5193bed6b4 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 6 Mar 2024 10:34:05 -0500 Subject: [PATCH 54/58] docs: citation add --- CITATION.cff | 1 + 1 file changed, 1 insertion(+) diff --git a/CITATION.cff b/CITATION.cff index e52b1c9..817be73 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -3,6 +3,7 @@ message: "Please cite LOGAN as below." # TODO set up Zenodo to archive your tool authors: # TODO: author names should match those in pyproject.toml - family-names: Nousome given-names: Darryl + orcid: https://orcid.org/0000-0002-5259-8599 - family-names: Sovacool given-names: Kelly orcid: https://orcid.org/0000-0003-3283-829X From e1d1356e45abb0332d617f6435e488f722eaaed0 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 6 Mar 2024 10:53:07 -0500 Subject: [PATCH 55/58] fix: simplify testing --- .github/workflows/build.yml | 9 ++++++++- .github/workflows/tests.yaml | 34 ---------------------------------- .tests/interval.bed | 0 3 files changed, 8 insertions(+), 35 deletions(-) delete mode 100644 .github/workflows/tests.yaml create mode 100644 .tests/interval.bed diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e91bde8..e6e4582 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -36,4 +36,11 @@ jobs: mkdir tmp && cd tmp which logan logan init - logan run -profile ci_stub,docker -stub + logan run -profile ci_stub,docker \ + --fastq_input "/opt2/.tests/*R{1,2}.fastq.gz" \ + --vc --cnv --sv \ + --genome hg38 \ + --outdir /opt2/output_tn_fqs \ + --interval /opt2/.tests/interval.bed \ + -stub + diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml deleted file mode 100644 index 7cff41e..0000000 --- a/.github/workflows/tests.yaml +++ /dev/null @@ -1,34 +0,0 @@ -name: tests - -on: - push: - branches: - - main - pull_request: - branches_ignore: [] - -jobs: - Stub_Run: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: docker://nextflow/nextflow:22.10.8 - - name: Tumor-normal FastQ Stub Run - run: | - docker run -v $PWD:/opt2 nextflow/nextflow:22.10.8 \ - nextflow run --fastq_input "/opt2/.tests/*R{1,2}.fastq.gz" \ - -profile ci_stub \ - --vc --cnv --sv \ - --genome hg38 \ - --outdir /opt2/output_tn_fqs \ - --sample_sheet "/opt2/.tests/pairs.tsv" \ - --stub - - name: Tumor-only FastQ Stub Run - run: | - docker run -v $PWD:/opt2 nextflow/nextflow:22.10.8 \ - nextflow run --fastq_input "/opt2/.tests/*R{1,2}.fastq.gz" \ - -profile ci_stub \ - --vc --cnv --sv \ - --genome hg38 \ - --outdir /opt2/output_tn_fqs \ - --stub diff --git a/.tests/interval.bed b/.tests/interval.bed new file mode 100644 index 0000000..e69de29 From 862f1b6dd675113e94d12af8b4d3653f10c658bb Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 6 Mar 2024 12:13:37 -0500 Subject: [PATCH 56/58] docs: changelog --- CHANGELOG.md | 5 ++++- modules/local/copynumber.nf | 18 +++++++++--------- modules/local/qc.nf | 3 --- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f8ecdbb..4fa28d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ # LOGAN development version -- Python-based CLI \ No newline at end of file +- Changed over to Nextflow CCBR template and pip packaging + - Processes moved to `modules/local` directory + - Workflows under the `subworkflows/local` directory + - Processes fall under low/med/high, but adding a somaticvariant caller process diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf index 0f4a92b..ef47cc7 100644 --- a/modules/local/copynumber.nf +++ b/modules/local/copynumber.nf @@ -3,15 +3,15 @@ SEQUENZAGC=file(params.genomes[params.genome].SEQUENZAGC) SEQUENZA_SCRIPT=params.script_sequenza if (params.genome=="mm10"){ -FREECLENGTHS=file(params.genomes[params.genome].FREEC.FREECLENGTHS) -FREECCHROMS=file(params.genomes[params.genome].FREEC.FREECCHROMS) -FREECPILEUP=file(params.genomes[params.genome].FREEC.FREECPILEUP) -FREECSNPS = file(params.genomes[params.genome].FREEC.FREECSNPS) -FREECTARGETS=file(params.genomes[params.genome].intervals) -FREECSCRIPT = params.script_freec -FREECPAIR_SCRIPT = params.script_freecpaired -FREECSIGNIFICANCE = params.freec_significance -FREECPLOT = params.freec_plot + FREECLENGTHS=file(params.genomes[params.genome].FREEC.FREECLENGTHS) + FREECCHROMS=file(params.genomes[params.genome].FREEC.FREECCHROMS) + FREECPILEUP=file(params.genomes[params.genome].FREEC.FREECPILEUP) + FREECSNPS = file(params.genomes[params.genome].FREEC.FREECSNPS) + FREECTARGETS=file(params.genomes[params.genome].intervals) + FREECSCRIPT = params.script_freec + FREECPAIR_SCRIPT = params.script_freecpaired + FREECSIGNIFICANCE = params.freec_significance + FREECPLOT = params.freec_plot } GERMLINEHET="/data/SCLC-BRAINMETS/cn/copy_number/GermlineHetPon.38.vcf.gz" diff --git a/modules/local/qc.nf b/modules/local/qc.nf index 7732405..501fce6 100644 --- a/modules/local/qc.nf +++ b/modules/local/qc.nf @@ -18,7 +18,6 @@ SCRIPT_PATH_PCA = file(params.script_ancestry) //OUTPUT DIRECTORY process fc_lane { container = "${params.containers.logan}" - label 'process_low' input: @@ -46,8 +45,6 @@ process fc_lane { process fastq_screen { //Uses Trimmed Files - - input: tuple val(samplename), path("${samplename}.R1.trimmed.fastq.gz"), From 4c530157f6bf275722ae1450369f9431bde31147 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 6 Mar 2024 14:59:34 -0500 Subject: [PATCH 57/58] refactor: delete python script --- logan | 314 ---------------------------------------------------------- 1 file changed, 314 deletions(-) delete mode 100755 logan diff --git a/logan b/logan deleted file mode 100755 index 9a72893..0000000 --- a/logan +++ /dev/null @@ -1,314 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: UTF-8 -*- - -""" -ABOUT: This is the main entry for the LOGAN (whole genome sequencing pipeline). -REQUIRES: - - python>=3.5 - - nextflow - - singularity -DISCLAIMER: - PUBLIC DOMAIN NOTICE - CCR Collaborative Bioinformatics Resource (CCBR) - National Cancer Institute (NCI) -This software/database is a "United States Government Work" under -the terms of the United States Copyright Act. It was written as -part of the author's official duties as a United States Government -employee and thus cannot be copyrighted. This software is freely -available to the public for use. -Although all reasonable efforts have been taken to ensure the -accuracy and reliability of the software and data, CCBR do not and -cannot warrant the performance or results that may be obtained by -using this software or data. CCBR and NCI disclaim all warranties, -express or implied, including warranties of performance, -merchantability or fitness for any particular purpose. -Please cite the author and the "NIH Biowulf Cluster" in any work or -product based on this material. - - - PIPELINE TYPE - Align --PIPE_ALIGN-TRIM ALIGN - Variant Calls--PIPE_VC-Variant calling step after align - Germline Calls DV--PIPE_GERMLINE-Germline after align - QC requires Alignment, Germline--PIPE_QC--After everything - --PIPE_BAMVC-BAM variant calling only - --PIPE_TONLY_TRIM-Trim and Align - --PIPE_TONLY_TRIM-Trim and Align -""" - -# Python standard library -import argparse, os, time, sys, subprocess, re, json - - -def parse_args(): - parser = argparse.ArgumentParser(description="Input files") - parser.add_argument("--fastq", help="FQ Inputs") - parser.add_argument( - "--file_input", - help="TSV file of all fastq files used for input with 3 Columns Sample Name, Pair1, Pair2", - ) - parser.add_argument("--bam", help="Glob of all the BAM files []") - parser.add_argument("--sample_sheet", help="Sample sheet and required for Paired") - parser.add_argument("--splitregions", default=24, help="How splits per regions") - parser.add_argument("--vc", help="Add Somatic VC calling", action="store_true") - parser.add_argument("--cnv", help="Add CNV calling", action="store_true") - parser.add_argument( - "--sv", help="Add Structural Variant calling", action="store_true" - ) - parser.add_argument("--germline", help="Add Germline VC", action="store_true") - parser.add_argument( - "--qc", - help="Add QC Steps (Requires Germline Calling as well)", - action="store_true", - ) - parser.add_argument("--output", help="Output Directory") - parser.add_argument("--genome", help="hg38, mm10") - parser.add_argument("--profile", help="Biowulf or Local Run") - parser.add_argument( - "--resume", action="store_true", default="True", help="Resume previous run?" - ) - parser.add_argument("--submit", action="store_true", help="Submit to SLURM?") - parser.add_argument("--stub", action="store_true", help="Stub run") - args = parser.parse_args() - return args - - -def main(): - args = parse_args() - dirname = os.path.dirname(os.path.realpath(__file__)) - outdirname = os.path.basename(os.getcwd()) - c1 = "#!/usr/bin/bash" - c2 = "module load nextflow" - c3 = "module load singularity" - # Paired Mode-> either align/VC/SV/CNV/germline(QC as well) with FASTQ - if args.sample_sheet: - sample_path = "--sample_sheet '" + args.sample_sheet + "'" - ##Input Section - if args.fastq: - in1 = "--fastq_input '" + args.fastq + "'" - elif args.file_input: - in1 = "--file_input " + args.file_input - elif args.bam: - in1 = "--bam_input '" + args.bam + "'" - baminput = True - else: - print( - "Missing sample sheet for paired mode or you would like Tumor only mode?" - ) - alignmode = "--PIPE_ALIGN" - if args.vc and args.bam: - vcmode = "--PIPE_BAMVC" - elif args.vc: - vcmode = "--PIPE_VC" - if args.sv and args.bam: - svmode = "--PIPE_BAMSV" - elif args.sv: - svmode = "--PIPE_SV" - if args.cnv and args.bam: - cnvmode = "--PIPE_BAMCNV" - elif args.cnv: - cnvmode = "--PIPE_CNV" - if args.germline and args.bam: - germmode = "--PIPE_BAMGERMLINE" - elif args.germline: - germmode = "--PIPE_GERMLINE" - if args.qc and args.germline: - qcmode = "--PIPE_QC_GL" - elif args.qc: - qcmode = "--PIPE_QC_NOGL" - else: - ##SET DEFAULT for Tumor-Only Modes//Tumor Only Mode (No sample sheet) - alignmode = "--PIPE_TONLY_ALIGN" - qcmode = "--PIPE_TONLY_QC" - if ( - args.file_input and re.search(r".bam", open(args.file_input, "r").read()) - ) or args.bam: - baminput = True - sample_path = "" - if args.vc: - if args.fastq: - vcmode = "--PIPE_TONLY_VC" - in1 = "--fastq_input '" + args.fastq + "'" - elif args.bam: - vcmode = "--PIPE_TONLY_BAMVC" - in1 = "--bam_input '" + args.bam + "'" - elif args.file_input: - in1 = "--file_input " + args.file_input - bamin = re.search(r".bam", open(args.file_input, "r").read()) - if bamin: - vcmode = "--PIPE_TONLY_BAMVC" - else: - vcmode = "--PIPE_TONLY_VC" - if args.sv: - if args.fastq: - svmode = "--PIPE_TONLY_SV" - in1 = "--fastq_input '" + args.fastq + "'" - elif args.bam: - svmode = "--PIPE_TONLY_BAMSV" - in1 = "--bam_input '" + args.bam + "'" - elif args.file_input: - in1 = "--file_input " + args.file_input - bamin = re.search(r".bam", open(args.file_input, "r").read()) - if bamin: - svmode = "--PIPE_TONLY_BAMSV" - else: - svmode = "--PIPE_TONLY_SV" - if args.cnv: - if args.fastq: - cnvmode = "--PIPE_TONLY_CNV" - in1 = "--fastq_input '" + args.fastq + "'" - elif args.bam: - cnvmode = "--PIPE_TONLY_BAMCNV" - in1 = "--bam_input '" + args.bam + "'" - elif args.file_input: - in1 = "--file_input " + args.file_input - bamin = re.search(r".bam", open(args.file_input, "r").read()) - if bamin: - cnvmode = "--PIPE_TONLY_BAMCNV" - else: - cnvmode = "--PIPE_TONLY_CNV" - if args.qc: - if args.fastq: - in1 = "--fastq_input '" + args.fastq + "'" - elif args.file_input: - in1 = "--file_input " + args.file_input - if args.stub and args.profile is None: - profile = "-profile localstub" - splitreg = "4" - elif args.profile == "local": - profile = "-profile local" - splitreg = str(args.splitregions) - elif args.profile == "biowulf" or args.profile is None: - profile = "-profile biowulf" - splitreg = str(args.splitregions) - if args.resume: - resume = "-resume" - else: - resume = "" - ###COMBINE ALL COMMANDS (PIPE ALIGN) - commandbase = [ - "nextflow run", - dirname + "/main.nf", - "-c " + dirname + "/nextflow.config", - in1, - profile, - resume, - sample_path, - "--genome", - args.genome, - "--output '" + args.output + "'" + " --split_regions " + splitreg, - ] - ##FINAL COMMANDS - if not "baminput" in locals(): - commandalign = commandbase + [alignmode] - cmd1 = " ".join(commandalign) - else: - cmd1 = "" - if args.vc: - commandvc = commandbase + [vcmode] - cmd2 = " ".join(commandvc) - else: - cmd2 = "" - if args.sv: - commandsv = commandbase + [svmode] - cmd3 = " ".join(commandsv) - else: - cmd3 = "" - if args.cnv: - commandcnv = commandbase + [cnvmode] - cmd4 = " ".join(commandcnv) - else: - cmd4 = "" - if args.germline: - commandgl = commandbase + [germmode] - cmd5 = " ".join(commandgl) - else: - cmd5 = "" - if args.qc: - commandqc = commandbase + [qcmode] - cmd6 = " ".join(commandqc) - else: - cmd6 = "" - code = ( - c1 - + "\n" - + c2 - + "\n" - + c3 - + "\n" - + cmd1 - + "\n" - + cmd2 - + "\n" - + cmd3 - + "\n" - + cmd4 - + "\n" - + cmd5 - + "\n" - + cmd6 - ) - time1 = time.strftime("%Y_%m_%d_%H%M") - stubbase = " -stub -without-podman T -without-conda -without-docker" - if args.stub: - if not "baminput" in locals(): - cmd1_stub = cmd1 + stubbase - else: - cmd1_stub = "" - if args.vc: - cmd2_stub = cmd2 + stubbase - else: - cmd2_stub = "" - if args.sv: - cmd3_stub = cmd3 + stubbase - else: - cmd3_stub = "" - if args.cnv: - cmd4_stub = cmd4 + stubbase - else: - cmd4_stub = "" - if args.germline: - cmd5_stub = cmd5 + stubbase - else: - cmd5_stub = "" - if args.qc: - cmd6_stub = cmd6 + stubbase - else: - cmd6_stub = "" - cmd_stub = ( - cmd1_stub - + "\n" - + cmd2_stub - + "\n" - + cmd3_stub - + "\n" - + cmd4_stub - + "\n" - + cmd5_stub - + "\n" - + cmd6_stub - ) - print(cmd_stub) - os.system(cmd_stub) - else: - outswarmmut = args.output + "_" + time1 + ".slurm" - with open(outswarmmut, "a") as outfile: - outfile.write(code + "\n") - sbatch_mut = ( - "sbatch --cpus-per-task=2 --mem=8g --time 10-00:00:00 --partition norm --output submit_" - + time1 - + ".log --error error_" - + time1 - + ".log --mail-type=BEGIN,END " - + outswarmmut - ) - sbatch_out = "kickoff_" + time1 + ".sh" - with open(sbatch_out, "a") as outfile: - outfile.write(sbatch_mut + "\n") - print(sbatch_mut) - if args.submit: - os.system(sbatch_mut) - - -if __name__ == "__main__": - main() From 18a471a4778ee9d73f673a29db326de3d25230ba Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Thu, 7 Mar 2024 10:34:06 -0500 Subject: [PATCH 58/58] fix: change indel reference --- conf/genomes.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/genomes.config b/conf/genomes.config index 3d0843a..fde6bcc 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -9,10 +9,10 @@ params { intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed" //millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //shapeitindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) // - KNOWNINDELS= '/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_indel.vcf' + KNOWNINDELS= '/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz' KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz' dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz" - dbsnp_indel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_indel.vcf" + dbsnp_indel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/updatedpon.vcf.gz" //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon} kgp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/1000G_phase1.snps.high_confidence.hg38.vcf.gz"