Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat indelrealign #39

Merged
merged 12 commits into from
Apr 17, 2024
4 changes: 3 additions & 1 deletion conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ process {
time = { check_max( 16.h * task.attempt, 'time' ) }
}
withLabel:process_long {
time = { check_max( 20.h * task.attempt, 'time' ) }
cpus = { check_max( 4 * task.attempt, 'cpus' ) }
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
time = { check_max( 72.h * task.attempt, 'time' ) }
}
withLabel:process_high_memory {
memory = { check_max( 200.GB * task.attempt, 'memory' ) }
Expand Down
13 changes: 8 additions & 5 deletions conf/genomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ params {
genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict"
wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list"
intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed"
//millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
//shapeitindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) //
KNOWNINDELS= '/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz'
millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
shapeitindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) //
INDELREF = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz"
KNOWNINDELS = "-known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz -known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz"
KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz'
dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz"
dbsnp_indel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz
pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/updatedpon.vcf.gz" //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon}
kgp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/1000G_phase1.snps.high_confidence.hg38.vcf.gz"
Expand All @@ -25,6 +25,7 @@ params {
vepcache = "/fdb/VEP/102/cache"
vepspecies = "homo_sapiens"
vepbuild = "GRCh38"
annotsvgenome = "GRCh38"
octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest"
octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest"
SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz"
Expand All @@ -37,7 +38,8 @@ params {
bwagenome= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwaindex/genome.fa"
genomedict= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.dict"
intervals="/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/mm10_wgsregions.bed"
KNOWNINDELS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz"
KNOWNINDELS = "-known /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz"
INDELREF = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz"
KNOWNRECAL = "-known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz -known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_snps.vcf.gz"
dbsnp = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_allstrains_dbSNP142.vcf.gz"
pon = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_dbSNP_allStrains_compSet_noIND.vcf.gz"
Expand All @@ -53,6 +55,7 @@ params {
vepcache = "/fdb/VEP/102/cache"
vepspecies = "mus_musculus"
vepbuild= "GRCm38"
annotsvgenome = "mm10"
octopus_sforest = ""
octopus_gforest = ""
SEQUENZAGC = '/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/SEQUENZA/mm10.gc50Base.wig.gz'
Expand Down
2 changes: 0 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ include {INPUT_TONLY; INPUT_TONLY_BAM;
VC_TONLY; SV_TONLY; CNVhuman_tonly; CNVmouse_tonly; QC_TONLY } from "./subworkflows/local/workflows_tonly.nf"



//SUB WORKFLOWS to SPLIT
workflow.onComplete {
if (!workflow.stubRun && !workflow.commandLine.contains('-preview')) {
def message = Utils.spooker(workflow)
Expand Down
11 changes: 5 additions & 6 deletions modules/local/copynumber.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ ENSEMBLCACHE='/data/SCLC-BRAINMETS/cn/common/ensembl_data'
DRIVERS='/data/SCLC-BRAINMETS/cn/common/DriverGenePanel.38.tsv'
HOTSPOTS='/data/SCLC-BRAINMETS/cn/variants/KnownHotspots.somatic.38.vcf.gz'

//DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS)
//ascatR=


Expand Down Expand Up @@ -273,7 +272,7 @@ process amber_tonly {

"""

java -Xmx32G -cp amber.jar com.hartwig.hmftools.amber.AmberApplication \
java -Xmx32G -cp /opt2/hmftools/amber.jar com.hartwig.hmftools.amber.AmberApplication \
-tumor ${tumorname} -tumor_bam ${tumor} \
-output_dir ${tumorname}_amber \
-threads $task.cpus \
Expand Down Expand Up @@ -310,7 +309,7 @@ process amber_tn {

"""

java -Xmx32G -cp amber.jar com.hartwig.hmftools.amber.AmberApplication \
java -Xmx32G -cp /opt2/hmftools/amber.jar com.hartwig.hmftools.amber.AmberApplication \
-tumor ${tumorname} -tumor_bam ${tumor} \
-reference ${normalname} -reference_bam ${normal} \
-output_dir ${tumorname}_vs_${normalname}_amber \
Expand Down Expand Up @@ -346,7 +345,7 @@ process cobalt_tonly {

"""

java -jar -Xmx8G cobalt.jar \
java -jar -Xmx8G /opt2/hmftools/cobalt.jar \
-tumor ${tumorname} -tumor_bam ${tumor} \
-output_dir ${tumorname}_cobalt \
-threads $task.cpus \
Expand Down Expand Up @@ -382,7 +381,7 @@ process cobalt_tn {

"""

java -jar -Xmx8G cobalt.jar \
java -jar -Xmx8G /opt2/hmftools/cobalt.jar \
-tumor ${tumorname} -tumor_bam ${tumorname} \
-reference ${normalname} -reference_bam ${normal} \
-output_dir ${tumorname}_vs_${normalname}_cobalt \
Expand Down Expand Up @@ -418,7 +417,7 @@ process purple {
script:

"""
java -jar purple.jar \
java -jar /opt2/hmftools/purple.jar \
-tumor ${tumorname} \
-amber ${amberin} \
-cobalt ${cobaltin} \
Expand Down
20 changes: 11 additions & 9 deletions modules/local/structural_variant.nf
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
GENOMEREF=file(params.genomes[params.genome].genome)
GENOME=params.genome
ANNOTSVGENOME=file(params.genomes[params.genome].annotsvgenome)
BWAGENOME=file(params.genomes[params.genome].bwagenome)
DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS)
INDELREF=file(params.genomes[params.genome].INDELREF)



process svaba_somatic {
container = "${params.containers.logan}"
label 'process_highcpu'

input:
Expand All @@ -30,7 +31,7 @@ process svaba_somatic {

script:
"""
svaba run -t ${tumor} -n ${normal} -p $task.cpus -D $DBSNP_INDEL -a ${tumor.simpleName} -G $BWAGENOME
svaba run -t ${tumor} -n ${normal} -p $task.cpus -D $INDELREF -a ${tumor.simpleName} -G $BWAGENOME
"""

stub:
Expand All @@ -56,7 +57,7 @@ process svaba_somatic {


process manta_somatic {

container = "${params.containers.logan}"
label 'process_highcpu'

input:
Expand Down Expand Up @@ -102,7 +103,6 @@ process manta_somatic {
process annotsv_tn {
//AnnotSV for Manta/Svaba works with either vcf.gz or .vcf files
//Requires bedtools,bcftools

module = ['annotsv/3.3.1']

input:
Expand All @@ -119,7 +119,7 @@ process annotsv_tn {
mkdir ${sv}

AnnotSV -SVinputFile ${somaticvcf} \
-genomeBuild $GENOME \
-genomeBuild $ANNOTSVGENOME \
-SVinputInfo 1 -outputFile ${tumorname} \
-outputDir ${sv}

Expand All @@ -136,6 +136,7 @@ process annotsv_tn {


process manta_tonly {
container = "${params.containers.logan}"
label 'process_highcpu'

input:
Expand Down Expand Up @@ -178,6 +179,7 @@ process manta_tonly {


process svaba_tonly {
container = "${params.containers.logan}"
label 'process_highcpu'

input:
Expand All @@ -198,7 +200,7 @@ process svaba_tonly {

script:
"""
svaba run -t ${tumor} -p $task.cpus -D $DBSNP_INDEL -a ${tumor.simpleName} -G $BWAGENOME
svaba run -t ${tumor} -p $task.cpus -D $INDELREF -a ${tumor.simpleName} -G $BWAGENOME
"""

stub:
Expand Down Expand Up @@ -230,7 +232,7 @@ process gunzip {

script:
"""
gunzip ${vcf} > ${tumorname}.tumorSV.vcf
gunzip -f ${vcf} > ${tumorname}.tumorSV.vcf
"""

stub:
Expand Down Expand Up @@ -291,7 +293,7 @@ process annotsv_tonly {
mkdir ${sv}

AnnotSV -SVinputFile ${somaticvcf} \
-genomeBuild $GENOME \
-genomeBuild $ANNOTSVGENOME \
-SVinputInfo 1 -outputFile ${tumorname} \
-outputDir ${sv}

Expand Down
122 changes: 78 additions & 44 deletions modules/local/trim_align.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
GENOMEREF=file(params.genomes[params.genome].genome)
GENOMEREF = file(params.genomes[params.genome].genome)
KNOWNRECAL = params.genomes[params.genome].KNOWNRECAL

KNOWNINDELS = params.genomes[params.genome].KNOWNINDELS

process fastp {
container = "${params.containers.logan}"
Expand Down Expand Up @@ -77,6 +77,70 @@ process bwamem2 {



process indelrealign {
container "${params.containers.logan}"
label 'process_long'

input:
tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai")

output:
tuple val(samplename), path("${samplename}.ir.bam"), path("${samplename}.ir.bai")

script:

"""
/usr/lib/jvm/java-8-openjdk-amd64/bin/java -Xmx16g -jar \$GATK_JAR -T RealignerTargetCreator \
-I ${samplename}.bam \
-R ${GENOMEREF} \
-o ${samplename}.intervals \
-nt $task.cpus \
${KNOWNINDELS}

/usr/lib/jvm/java-8-openjdk-amd64/bin/java -Xmx16g -jar \$GATK_JAR -T IndelRealigner \
-R ${GENOMEREF} \
-I ${samplename}.bam \
${KNOWNINDELS} \
-targetIntervals ${samplename}.intervals \
-o ${samplename}.ir.bam
"""

stub:
"""
touch ${samplename}.ir.bam ${samplename}.ir.bai
"""

}


process bqsr_ir {
/*
Base quality recalibration for all samples
*/
container = "${params.containers.logan}"
label 'process_low'
input:
tuple val(samplename), path("${samplename}.ir.bam"), path("${samplename}.ir.bai"), path(bed)

output:
tuple val(samplename), path("${samplename}_${bed.simpleName}.recal_data.grp")

script:
"""
gatk --java-options '-Xmx16g' BaseRecalibrator \
--input ${samplename}.ir.bam \
--reference ${GENOMEREF} \
${KNOWNRECAL} \
--output ${samplename}_${bed.simpleName}.recal_data.grp \
--intervals ${bed}
"""

stub:
"""
touch ${samplename}_${bed.simpleName}.recal_data.grp
"""
}

process bqsr {
/*
Base quality recalibration for all samples
Expand All @@ -103,7 +167,6 @@ process bqsr {
"""
touch ${samplename}_${bed.simpleName}.recal_data.grp
"""

}

process gatherbqsr {
Expand Down Expand Up @@ -131,16 +194,15 @@ process gatherbqsr {
"""
}


process applybqsr {
/*
Base quality recalibration for all samples to
*/
container = "${params.containers.logan}"
label 'process_low'
label 'process_long'

input:
tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path("${samplename}.recal_data.grp")
tuple val(samplename), path(bam), path(bai), path("${samplename}.recal_data.grp")

output:
tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bai")
Expand All @@ -150,7 +212,7 @@ process applybqsr {
"""
gatk --java-options '-Xmx32g' ApplyBQSR \
--reference ${GENOMEREF} \
--input ${samplename}.bam \
--input ${bam} \
--bqsr-recal-file ${samplename}.recal_data.grp \
--output ${samplename}.bqsr.bam \
--use-jdk-inflater \
Expand All @@ -166,7 +228,6 @@ process applybqsr {
}



process samtoolsindex {
container = "${params.containers.logan}"
label 'process_medium'
Expand Down Expand Up @@ -198,48 +259,21 @@ process bamtocram_tonly {
tuple val(tumorname), path(tumor), path(tumorbai)

output:
path("${sample}.cram")

script:
"""
samtools view -@ $task.cpus -C -T $GENOMEREF -o ${sample}.cram {$tumor}.bam
"""
}

path("${tumorname}.cram"), path("${tumorname}.cram.crai")

/*
process indelrealign {
input:
tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai")

output:
tuple val(samplename), path("${samplename}.ir.bam")

script:

"""
/usr/bin/java -Xmx32g -jar \${GATK_JAR} -T RealignerTargetCreator \
-I ${samplename}.bam \
-R ${GENOMEREF} \
-o ${samplename}.intervals \
-nt 16 \
-known ${MILLSINDEL} -known ${SHAPEITINDEL}

/usr/bin/java -Xmx32g -jar \${GATK_JAR} -T IndelRealigner \
-R ${GENOMEREF} \
-I ${samplename}.bam \
-known ${MILLSINDEL} -known ${SHAPEITINDEL} \
--use_jdk_inflater \
--use_jdk_deflater \
-targetIntervals ${samplename}.intervals \
-o ${samplename}.ir.bam
samtools view -@ $task.cpus -C -T $GENOMEREF -o ${sample}.cram $tumor
samtools index ${tumorname}.cram -@ $task.cpus
"""



stub:
"""
touch ${samplename}.ir.bam
touch ${tumorname}.cram ${tumorname}.cram.crai
"""

}
*/




Loading
Loading