Skip to content

Commit

Permalink
Merge pull request #6 from CCBR/main
Browse files Browse the repository at this point in the history
Additional changes to Tumor only calling
  • Loading branch information
dnousome authored Oct 4, 2023
2 parents 722f391 + 169e57e commit fa37f4b
Show file tree
Hide file tree
Showing 9 changed files with 124 additions and 35 deletions.
19 changes: 19 additions & 0 deletions Docker_hubmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
## Compile list of Modules to
module=['vcf2maf/1.6.21','VEP/102']
module=['fastq_screen/0.15.2','bowtie/2-2.5.1']
module=['kraken/2.1.2', 'kronatools/2.8']
module=['fastqc/0.11.9']
module=['qualimap/2.2.1','java/12.0.1']
module=['samtools/1.16.1']
module=['vcftools/0.1.16']
module=['picard/2.20.8']
module=['bcftools/1.9']
module=['GATK/4.2.0.0']
module=["snpEff/4.3t"]
module=['multiqc/1.11']
module=['GATK/3.8-1']
module=['bwa-mem2/2.2.1','samblaster/0.1.26','samtools/1.15.1']
module=['fastp/0.23.2']



19 changes: 16 additions & 3 deletions docker/logan_base/build.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
# Build image
docker buildx create --platform linux/amd64 --use
docker buildx build -f Dockerfile -t dnousome/ccbr_logan_base:v0.3.0 -t dnousome/ccbr_logan_base:latest --platform=linux/amd64 --push .
#docker buildx create --platform linux/amd64 --use
#docker buildx use upbeat_ganguly
#docker buildx inspect upbeat_ganguly
#docker buildx build --platform linux/amd64 -f Dockerfile -t dnousome/ccbr_logan_base:v0.3.0 -t dnousome/ccbr_logan_base:latest --push .

docker build --platform linux/amd64 --tag ccbr_logan_base:v0.3.0 -f Dockerfile .
docker tag ccbr_logan_base:v0.3.0 dnousome/ccbr_logan_base:v0.3.0
docker tag ccbr_logan_base:v0.3.0 dnousome/ccbr_logan_base

docker push dnousome/ccbr_logan_base:v0.3.0
docker push dnousome/ccbr_logan_base:latest




# Tag image with version and reset latest
#docker tag ccbr_wgs_base:v0.1.0 nciccbr/ccbr_wgs_base:v0.1.0
#docker tag ccbr_wgs_base:v0.1.0 nciccbr/ccbr_wgs_base

# Push image to DockerHub
#docker push nciccbr/ccbr_wgs_base:v0.1.0
#docker push nciccbr/ccbr_wgs_base:latest
#docker push nciccbr/ccbr_wgs_base:latest

34 changes: 27 additions & 7 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ params {
get_flowcell_lanes="${projectDir}/workflow/scripts/flowcell_lane.py"
intervals="${projectDir}/workflow/resources/hg38_v0_wgs_calling_regions.hg38.bed"
splitbed="${projectDir}/workflow/resources/split_Bed_into_equal_regions.py"
split_regions = "24" //Number of regions to split by
snpeff_genome = "GRCh38.86"
snpeff_config = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/snpEff.config"
snpeff_bundle = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/"
Expand All @@ -24,6 +25,7 @@ params {
script_combineSamples = "${projectDir}/workflow/scripts/RScripts/combineAllSampleCompareResults.R"
script_ancestry = "${projectDir}/workflow/scripts/RScripts/sampleCompareAncestoryPlots.R"
bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta"
vep_cache = "/fdb/VEP/102/cache"

//Biowulf
config_profile_description = 'Biowulf nf-core config'
Expand Down Expand Up @@ -106,6 +108,16 @@ profiles {
}
}

localstub {
process {
executor = 'local'

singularity {
enabled = false
}
}
}

biowulf {
process {
executor = 'slurm'
Expand Down Expand Up @@ -140,15 +152,23 @@ profiles {
time=24.h
cpus=8
}
withName:fastp {
container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
memory=24.GB
time=24.h
cpus=4
}
withName:fc_lane {
container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
}
withName:bwamem2 {
container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
memory=200.GB
time=48.h
cpus=17
}
withName:indelrealign{
container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
memory=48.GB
time=72.h
cpus=16
Expand Down Expand Up @@ -180,6 +200,12 @@ profiles {
cpus= 4
time= 72.h
}
withName: 'vardict_tn|vardict_tonly|varscan_tn|varscan_tonly|combineVariants|combineVariants_strelka' {
container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
memory= 32.GB
cpus= 2
time= 72.h
}
withName: 'strelka_tn' {
container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
memory= 48.GB
Expand All @@ -202,21 +228,18 @@ profiles {
memory= 48.GB
time= 24.h
}

withName:'mergemut2stats|mergemut2stats_tonly' {
container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
memory= 16.GB
time= 24.h
}

withName:'pileup_paired_t|pileup_paired_n|pileup_paired_tonly' {
container= 'docker://dnousome/ccbr_logan_base:v0.3.0'
memory= 16.GB
time= 12.h
}

withName:'annotvep_tn|annotvep_tonly'{
//container= 'docker://dnousome/ccbr_vcf2maf:v102.0.0'
container= 'docker://dnousome/ccbr_vcf2maf:v102.0.0'
memory= 32.GB
time= 24.h
cpus=16
Expand Down Expand Up @@ -286,9 +309,6 @@ profiles {
}
}

localstub {

}

}

9 changes: 5 additions & 4 deletions wgs-seek
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,14 @@ def parse_args():
parser.add_argument('--bam',help="Files input")
parser.add_argument('--mode',help='Mode?')
parser.add_argument('--paired',help='Paired',action="store_true")
parser.add_argument('--splitregions',default=24,help="How splits per regions")
parser.add_argument('--sv',help="Add Structural VC calling",action="store_true")
parser.add_argument('--output',help="Output Directory")
parser.add_argument('--sample_sheet',help="Sample sheet")
parser.add_argument('--profile',help="Biowulf or Local Run")
parser.add_argument('--resume',action="store_true",default="True",help="Resume previous run?")
parser.add_argument('--submit',action="store_true",help="Submit to SLURM?",default="False")
parser.add_argument('--stub',action="store_true",help="Stub dry run",default="False")
parser.add_argument('--submit',action="store_true",help="Submit to SLURM?")
parser.add_argument('--stub',action="store_true",help="Stub run")
args = parser.parse_args()
return(args)

Expand Down Expand Up @@ -125,7 +126,7 @@ def main():
resume=""
c4=["nextflow run",dirname + '/wgs-seek.nf',"-c "+ dirname +"/nextflow.config",
in1,profile,resume,sample_path,mode,
"--output '" +args.output+"'"]
"--output '" +args.output+"'"+" --split_regions " +str(args.splitregions)]
cmd1=' '.join(c4)
code=c1+"\n"+c2+"\n"+c3+"\n"+cmd1
time1=time.strftime("%Y_%m_%d_%H%M%S")
Expand All @@ -135,7 +136,7 @@ def main():
outfile.write(code+"\n")
sbatch_mut="sbatch --cpus-per-task=2 --mem=16g --time 10-00:00:00 --partition norm --output submit_"+time1+".log --error error_"+time1+".log --mail-type=BEGIN,TIME_LIMIT_90,END "+outswarmmut
if args.stub:
cmd2=cmd1+" -stub -without-podman T -without-conda -without-docker"
cmd2=cmd1+" --split_regions 4 -stub -without-podman T -without-conda -without-docker"
print(cmd2)
os.system(cmd2)
elif args.submit:
Expand Down
8 changes: 5 additions & 3 deletions workflow/modules/splitbed.nf
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
SPLIT_BED=file(params.splitbed)
SPLIT_REGIONS=params.split_regions


// Split Bed Step to create the path
process splitinterval {
//Keep Process Local
Expand All @@ -12,11 +16,9 @@ process splitinterval {
path('bedout/*.bed')

script:

SPLIT_BED=file(params.splitbed)

"""
mkdir -p bedout
python ${SPLIT_BED} -infile ${BED_IN} -num 32 -out 'bedout/bed'
python $SPLIT_BED -infile ${BED_IN} -num ${SPLIT_REGIONS} -out 'bedout/bed'
"""
}
7 changes: 4 additions & 3 deletions workflow/modules/variant_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ KGP=file(params.kgp) //1000G_phase1.snps.high_confidence.hg38.vcf.gz"
DBSNP=file(params.dbsnp) //dbsnp_138.hg38.vcf.gz"
GNOMAD=file(params.gnomad) //somatic-hg38-af-only-gnomad.hg38.vcf.gz
PON=file(params.pon)
VEP_CACHEDIR=file(params.vep_cache)

//Output
outdir=file(params.output)
Expand Down Expand Up @@ -394,7 +395,7 @@ process combineVariants {
mkdir ${vc}
bcftools concat $vcfin -Oz -o ${sample}.${vc}.temp.vcf.gz
bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz
bcftools norm ${sample}.${vc}.marked.vcf.gz --threads $task.cpus --check-ref s -f $GENOME -O |\
bcftools norm ${sample}.${vc}.marked.vcf.gz --threads $task.cpus --check-ref s -f $GENOME -O v |\
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
sed '/^\$/d' > ${sample}.${vc}.temp.vcf
Expand Down Expand Up @@ -473,8 +474,8 @@ process annotvep_tn {
--output-maf !{vc}/!{tumorsample}.maf \
--tumor-id !{tumorsample} \
--normal-id !{normalsample} \
--vep-path ${VEP_HOME}/bin \
--vep-data ${VEP_CACHEDIR} \
--vep-path /opt/vep/src/ensembl-vep \
--vep-data $VEP_CACHEDIR \
--ncbi-build GRCh38 --species homo_sapiens --ref-fasta !{GENOME}
"""
Expand Down
8 changes: 3 additions & 5 deletions workflow/modules/variant_calling_tonly.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ KGP=file(params.kgp) //1000G_phase1.snps.high_confidence.hg38.vcf.gz"
DBSNP=file(params.dbsnp) //dbsnp_138.hg38.vcf.gz"
GNOMAD=file(params.gnomad) //somatic-hg38-af-only-gnomad.hg38.vcf.gz
PON=file(params.pon)
VEP_CACHEDIR=file(params.vep_cache)

//Output
outdir=file(params.output)
Expand Down Expand Up @@ -317,8 +318,8 @@ process annotvep_tonly {
--vep-forks 16 --input-vcf !{tumorvcf}.vcf \
--output-maf !{vc}/!{tumorsample}.tonly.maf \
--tumor-id !{tumorsample} \
--vep-path ${VEP_HOME}/bin \
--vep-data ${VEP_CACHEDIR} \
--vep-path /opt/vep/src/ensembl-vep \
--vep-data $VEP_CACHEDIR \
--ncbi-build GRCh38 --species homo_sapiens --ref-fasta !{GENOME}
"""
Expand All @@ -330,9 +331,6 @@ process annotvep_tonly {
"""
}




process combinemafs_tonly {
publishDir(path: "${outdir}/mafs/tumor_only", mode: 'copy')

Expand Down
8 changes: 4 additions & 4 deletions workflow/modules/workflows.nf
Original file line number Diff line number Diff line change
Expand Up @@ -240,28 +240,28 @@ workflow VARIANTCALL_PIPE {
.map{tumor,markedvcf,finalvcf,normal -> tuple(tumor,normal,"strelka",finalvcf)} | annotvep_tn_strelka

//Vardict
vardict_comb=vardict_tn(bambyinterval).map{tumor,vcf-> tuple(tumor,vcf,"vardict")} | combineVariants_vardict
vardict_comb=vardict_tn(bambyinterval).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict")} | combineVariants_vardict
vardict_comb.join(sample_sheet)
.map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"vardict",normvcf)} | annotvep_tn_vardict

//VarDict_tonly
vardict_tonly_comb=bambyinterval.map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed ->
tuple(tumorname,tumorbam,tumorbai,bed)}
vardict_tonly(vardict_tonly_comb).map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} |combineVariants_vardict_tonly
vardict_tonly(vardict_tonly_comb).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} |combineVariants_vardict_tonly
combineVariants_vardict_tonly.out.join(sample_sheet)
.map{tumor,marked,normvcf,normal ->tuple(tumor,"vardict_tonly",normvcf)} | annotvep_tonly_vardict


//VarScan
varscan_in=bambyinterval.join(contamination_paired.out)
varscan_comb=varscan_tn(varscan_in).map{tumor,vcf-> tuple(tumor,vcf,"varscan")} | combineVariants_varscan
varscan_comb=varscan_tn(varscan_in).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan")} | combineVariants_varscan
varscan_comb.join(sample_sheet)
.map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"varscan",normvcf)} | annotvep_tn_varscan

//VarScan_tonly
varscan_tonly_comb=varscan_in.map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc ->
tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly
varscan_tonly_comb1=varscan_tonly_comb.map{tumor,vcf-> tuple(tumor,vcf,"varscan_tonly")} | combineVariants_varscan_tonly
varscan_tonly_comb1=varscan_tonly_comb.groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan_tonly")} | combineVariants_varscan_tonly

varscan_tonly_comb1.join(sample_sheet)
.map{tumor,marked,normvcf,normal ->tuple(tumor,"varscan_tonly",normvcf)} | annotvep_tonly_varscan
Expand Down
47 changes: 41 additions & 6 deletions workflow/modules/workflows_tonly.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,32 @@ include {fastp; bwamem2;
bqsr; gatherbqsr; applybqsr; samtoolsindex} from './trim_align.nf'
include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n;
contamination_paired; learnreadorientationmodel;mergemut2stats;
} from './variant_calling.nf'
combineVariants as combineVariants_vardict; combineVariants as combineVariants_varscan;
combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan_tonly
annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict;
combinemafs_tn} from './variant_calling.nf'
include {mutect2_t_tonly; mutect2filter_tonly;
varscan_tonly; vardict_tonly;
contamination_tumoronly;
learnreadorientationmodel_tonly;
mergemut2stats_tonly;
annotvep_tonly} from './variant_calling_tonly.nf'
annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; annotvep_tonly as annotvep_tonly_mut2;
combinemafs_tonly} from './variant_calling_tonly.nf'
include {splitinterval} from "./splitbed.nf"



workflow INPUT_TONLY_PIPE {
fastqinput=Channel.fromFilePairs(params.fastq_input)
if(params.fastq_input){
fastqinput=Channel.fromFilePairs(params.fastq_input)

}else if(params.file_input) {
fastqinput=Channel.fromPath(params.file_input)
.splitCsv(header: false, sep: "\t", strip:true)
.map{ sample,fq1,fq2 ->
tuple(sample, tuple(file(fq1),file(fq2)))
}
}


if(params.sample_sheet){
Expand All @@ -35,11 +49,14 @@ workflow INPUT_TONLY_PIPE {
row.Tumor
)
}

}else{
sample_sheet=fastqinput.map{samplename,f1 -> tuple (
samplename)}
}



emit:
fastqinput
sample_sheet
Expand Down Expand Up @@ -142,11 +159,29 @@ workflow VARIANT_TONLY_PIPE {

mutect2filter_tonly(mut2tonly_filter)

//#To implement
//CNMOPs from the BAM BQSRs

//##VCF2MAF TO

annotvep_tonly(mutect2filter_tonly.out)
mutect2filter_tonly.out
.join(sample_sheet)
.map{tumor,markedvcf,finalvcf,stats -> tuple(tumor,"mutect2",finalvcf)} | annotvep_tonly_mut2

//VarDict_tonly
vardict_tonly_comb=bambyinterval.map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed ->
tuple(tumorname,tumorbam,tumorbai,bed)}
vardict_tonly(vardict_tonly_comb).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} |combineVariants_vardict_tonly
combineVariants_vardict_tonly.out.join(sample_sheet)
.map{tumor,marked,normvcf ->tuple(tumor,"vardict_tonly",normvcf)} | annotvep_tonly_vardict

//VarScan_tonly
varscan_tonly_comb=varscan_in.map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc ->
tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly
varscan_tonly_comb1=varscan_tonly_comb.groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan_tonly")} | combineVariants_varscan_tonly

varscan_tonly_comb1.join(sample_sheet)
.map{tumor,marked,normvcf,normal ->tuple(tumor,"varscan_tonly",normvcf)} | annotvep_tonly_varscan

annotvep_tonly_mut2.out.concat(annotvep_tonly_vardict.out).concat(annotvep_tonly_varscan.out) | combinemafs_tonly

}

Expand Down

0 comments on commit fa37f4b

Please sign in to comment.