From be97c44da221362bf31e4633b13801d0ed7c3079 Mon Sep 17 00:00:00 2001 From: William Harvey Date: Fri, 17 Nov 2023 10:33:07 -0800 Subject: [PATCH] clair3 defaults --- 202201-6445530 | 1 + workflow/rules/common.smk | 13 ++++++++++++- workflow/rules/variant_calling.smk | 15 ++++++++++++--- 3 files changed, 25 insertions(+), 4 deletions(-) create mode 160000 202201-6445530 diff --git a/202201-6445530 b/202201-6445530 new file mode 160000 index 0000000..6445530 --- /dev/null +++ b/202201-6445530 @@ -0,0 +1 @@ +Subproject commit 64455305820080693e6e7e909ba6dcc1cd4572a8 diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 56df48a..768a68c 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -34,6 +34,17 @@ def find_clair_chrs(wildcards): chrom=chroms, ) +def find_clair_gvcf(wildcards): + if config.get("CHRS") == None: + with open(f"{REF}.fai", "r") as infile: + chroms = [line.split("\t")[0] for line in infile] + else: + chroms = config.get("CHRS") + return expand( + "tmp/variants/{{sample}}/{chrom}/merge_output.gvcf.gz", + chrom=chroms, + ) + def concatenate_fastq(wildcards): FOFN = manifest_df.at[wildcards.sample, "FOFN"] @@ -64,4 +75,4 @@ def find_aln_bai(wildcards): return rules.index_aln.output.merged_bai else: print("phase state must be either minimap2 (unphased) or longphase (phased)") - exit(1) \ No newline at end of file + exit(1) diff --git a/workflow/rules/variant_calling.smk b/workflow/rules/variant_calling.smk index 4a5e987..e189d6f 100644 --- a/workflow/rules/variant_calling.smk +++ b/workflow/rules/variant_calling.smk @@ -6,31 +6,39 @@ rule clair_chr: ref=REF, output: vcf="tmp/variants/{sample}/{chrom}/phased_merge_output.vcf.gz", + gvcf="tmp/variants/{sample}/{chrom}/merge_output.gvcf.gz", log: "log/{sample}_{chrom}.clair.log", conda: "../envs/clair3.yaml" threads: 8 resources: - mem=2, + mem=32, hrs=24, disk_free=1, shell: """ - run_clair3.sh --bam_fn={input.merged_bam} --sample_name={wildcards.sample} --ref_fn={input.ref} --threads={threads} --platform=ont --model_path=$(dirname $( which run_clair3.sh ) )/models/ont_guppy5 --output=$(dirname {output.vcf}) --ctg_name={wildcards.chrom} --enable_phasing + run_clair3.sh --bam_fn={input.merged_bam} --sample_name={wildcards.sample} --ref_fn={input.ref} --threads={threads} --platform=ont --model_path=$(dirname $( which run_clair3.sh ) )/models/ont_guppy5 --output=$(dirname {output.vcf}) --ctg_name={wildcards.chrom} --enable_phasing --gvcf if [[ ( -f $( echo {output.vcf} | sed 's/phased_//' ) ) && ( ! -f {output.vcf} ) ]]; then cp $( echo {output.vcf} | sed 's/phased_//' ) {output.vcf} elif [[ ( ! -f $( echo {output.vcf} | sed 's/phased_//' ) ) && ( ! -f {output.vcf} ) ]]; then zcat $(dirname {output.vcf})/pileup.vcf.gz | grep ^# | bgzip -c > {output.vcf} fi + if [[ ( -f $( echo {output.vcf} | sed 's/phased_//' ) ) && ( ! -f {output.gvcf} ) ]]; then + cp $( echo {output.vcf} | sed 's/phased_//' ) {output.gvcf} + elif [[ ( ! -f $( echo {output.vcf} | sed 's/phased_//' ) ) && ( ! -f {output.gvcf} ) ]]; then + zcat $(dirname {output.vcf})/pileup.vcf.gz | grep ^# | bgzip -c > {output.gvcf} + fi """ rule concat_clair: input: vcf=find_clair_chrs, + gvcf=find_clair_gvcf, output: vcf=temp("variants/{sample}/{sample}.clair3.vcf"), + gvcf="variants/{sample}/{sample}.clair3.gvcf.gz", envmodules: "modules", "modules-init", @@ -48,6 +56,7 @@ rule concat_clair: shell: """ bcftools concat -O v -o {output.vcf} {input.vcf} + bcftools concat -O v {input.gvcf} | bcftools sort -O v | bgzip -c > {output.gvcf} """ @@ -69,7 +78,7 @@ rule sniffles: conda: "../envs/sniffles.yaml" resources: - mem=10, + mem=24, hrs=24, disk_free=1, threads: 1