diff --git a/snappy_pipeline/workflows/common/gcnv/gcnv_run.py b/snappy_pipeline/workflows/common/gcnv/gcnv_run.py index 3fffb9c3b..1388a2c67 100644 --- a/snappy_pipeline/workflows/common/gcnv/gcnv_run.py +++ b/snappy_pipeline/workflows/common/gcnv/gcnv_run.py @@ -516,7 +516,7 @@ def get_result_files(self): The function will skip pedigrees where samples have inconsistent library kits and print a warning. """ - if "gcnv" not in self.config["tools"]: + if "gcnv" not in self.config["tools"] and "gcnv" not in self.config["tools"].get("dna", {}): return # Get list with all result path template strings. diff --git a/snappy_pipeline/workflows/common/sv_calling.py b/snappy_pipeline/workflows/common/sv_calling.py index 0e045f1cb..3fd6183c9 100644 --- a/snappy_pipeline/workflows/common/sv_calling.py +++ b/snappy_pipeline/workflows/common/sv_calling.py @@ -17,7 +17,9 @@ def get_result_files(self): The implementation will return a list of all paths with prefix ``output/` that are returned by ``self.get_output_files()`` for all actions in ``self.actions``. """ - if self.name not in self.config["tools"]: + if self.name not in self.config["tools"] and not ( + hasattr(self.config, "tools") and self.name in self.config["tools"].get("dna", {}) + ): return # tool not enabled, no result files ngs_mapping_config = DictQuery(self.w_config).get("step_config/ngs_mapping") diff --git a/snappy_pipeline/workflows/sv_calling_wgs/Snakefile b/snappy_pipeline/workflows/sv_calling_wgs/Snakefile index fca344e55..af67ca8a4 100644 --- a/snappy_pipeline/workflows/sv_calling_wgs/Snakefile +++ b/snappy_pipeline/workflows/sv_calling_wgs/Snakefile @@ -4,7 +4,7 @@ import os from snappy_pipeline import expand_ref -from snappy_pipeline.workflows.sv_calling_wgs import WgsSvCallingWorkflow +from snappy_pipeline.workflows.sv_calling_wgs import SvCallingWgsWorkflow __author__ = "Manuel Holtgrewe " @@ -20,15 +20,11 @@ config, lookup_paths, config_paths = expand_ref("config.yaml", config) # WorkflowImpl Object Setup =================================================== -wf = WgsSvCallingWorkflow(workflow, config, lookup_paths, config_paths, os.getcwd()) +wf = SvCallingWgsWorkflow(workflow, config, lookup_paths, config_paths, os.getcwd()) # Rules ======================================================================= -localrules: - sv_calling_wgs_write_pedigree, - - rule all: input: wf.get_result_files(), @@ -39,7 +35,7 @@ rule all: # Write out pedigree file ----------------------------------------------------- -rule sv_calling_targeted_write_pedigree_run: +rule sv_calling_wgs_write_pedigree_run: output: wf.get_output_files("write_pedigree", "run"), run: @@ -53,7 +49,7 @@ rule sv_calling_targeted_write_pedigree_run: rule sv_calling_wgs_manta_run: input: - wf.get_input_files("manta", "run"), + unpack(wf.get_input_files("manta", "run")), output: **wf.get_output_files("manta", "run"), threads: wf.get_resource("manta", "run", "threads") @@ -63,7 +59,7 @@ rule sv_calling_wgs_manta_run: partition=wf.get_resource("manta", "run", "partition"), tmpdir=wf.get_resource("manta", "run", "tmpdir"), log: - wf.get_log_file("manta", "run"), + **wf.get_log_file("manta", "run"), wrapper: wf.wrapper_path("manta/germline_wgs") @@ -84,16 +80,15 @@ rule sv_calling_wgs_delly2_call: tmpdir=wf.get_resource("delly2", "call", "tmpdir"), params: step_key="sv_calling_wgs", - library_info=wf.substep_getattr("delly2", "get_library_extra_infos"), log: - wf.get_log_file("delly2", "call"), + **wf.get_log_file("delly2", "call"), wrapper: wf.wrapper_path("delly2/germline/call") rule sv_calling_wgs_delly2_merge_calls: input: - wf.get_input_files("delly2", "merge_calls"), + unpack(wf.get_input_files("delly2", "merge_calls")), output: **wf.get_output_files("delly2", "merge_calls"), threads: wf.get_resource("delly2", "merge_calls", "threads") @@ -105,7 +100,7 @@ rule sv_calling_wgs_delly2_merge_calls: params: step_key="sv_calling_wgs", log: - wf.get_log_file("delly2", "merge_calls"), + **wf.get_log_file("delly2", "merge_calls"), wrapper: wf.wrapper_path("delly2/germline/merge_calls") @@ -123,16 +118,15 @@ rule sv_calling_wgs_delly2_genotype: tmpdir=wf.get_resource("delly2", "genotype", "tmpdir"), params: step_key="sv_calling_wgs", - library_info=wf.substep_getattr("delly2", "get_library_extra_infos"), log: - wf.get_log_file("delly2", "genotype"), + **wf.get_log_file("delly2", "genotype"), wrapper: wf.wrapper_path("delly2/germline/genotype") rule sv_calling_wgs_delly2_merge_genotypes: input: - wf.get_input_files("delly2", "merge_genotypes"), + unpack(wf.get_input_files("delly2", "merge_genotypes")), output: **wf.get_output_files("delly2", "merge_genotypes"), threads: wf.get_resource("delly2", "merge_genotypes", "threads") @@ -144,163 +138,391 @@ rule sv_calling_wgs_delly2_merge_genotypes: params: step_key="sv_calling_wgs", log: - wf.get_log_file("delly2", "merge_genotypes"), + **wf.get_log_file("delly2", "merge_genotypes"), wrapper: wf.wrapper_path("delly2/germline/merge_genotypes") -# Run PB Honey Spots ---------------------------------------------------------- - - -rule sv_calling_wgs_pb_honey_spots_run: +## Run Sniffles ---------------------------------------------------------------- +# +# +# rule sv_calling_wgs_sniffles_run: +# input: +# wf.get_input_files("sniffles", "run"), +# output: +# **wf.get_output_files("sniffles", "run"), +# threads: wf.get_resource("sniffles", "run", "threads") +# resources: +# time=wf.get_resource("sniffles", "run", "time"), +# memory=wf.get_resource("sniffles", "run", "memory"), +# partition=wf.get_resource("sniffles", "run", "partition"), +# tmpdir=wf.get_resource("sniffles", "run", "tmpdir"), +# log: +# wf.get_log_file("sniffles", "run"), +# wrapper: +# wf.wrapper_path("sniffles/germline") +# +# +## Run Sniffles 2 -------------------------------------------------------------- +# +# +# rule sv_calling_wgs_sniffles2_bam_to_snf: +# input: +# unpack(wf.get_input_files("sniffles2", "bam_to_snf")), +# output: +# **wf.get_output_files("sniffles2", "bam_to_snf"), +# threads: wf.get_resource("sniffles2", "bam_to_snf", "threads") +# resources: +# time=wf.get_resource("sniffles2", "bam_to_snf", "time"), +# memory=wf.get_resource("sniffles2", "bam_to_snf", "memory"), +# partition=wf.get_resource("sniffles2", "bam_to_snf", "partition"), +# tmpdir=wf.get_resource("sniffles2", "bam_to_snf", "tmpdir"), +# log: +# wf.get_log_file("sniffles2", "bam_to_snf"), +# wrapper: +# wf.wrapper_path("sniffles2/germline/bam_to_snf") +# +# +# rule sv_calling_wgs_sniffles2_snf_to_vcf: +# input: +# unpack(wf.get_input_files("sniffles2", "snf_to_vcf")), +# output: +# **wf.get_output_files("sniffles2", "snf_to_vcf"), +# threads: wf.get_resource("sniffles2", "snf_to_vcf", "threads") +# resources: +# time=wf.get_resource("sniffles2", "snf_to_vcf", "time"), +# memory=wf.get_resource("sniffles2", "snf_to_vcf", "memory"), +# partition=wf.get_resource("sniffles2", "snf_to_vcf", "partition"), +# tmpdir=wf.get_resource("sniffles2", "snf_to_vcf", "tmpdir"), +# log: +# wf.get_log_file("sniffles2", "snf_to_vcf"), +# wrapper: +# wf.wrapper_path("sniffles2/germline/snf_to_vcf") + + +## PopDel Steps ---------------------------------------------------------------- +# +# +# ruleorder: sv_calling_wgs_popdel_reorder_vcf > sv_calling_wgs_popdel_concat_calls > sv_calling_wgs_popdel_call > sv_calling_wgs_popdel_profile +# +# +# rule sv_calling_wgs_popdel_profile: +# input: +# unpack(wf.get_input_files("popdel", "profile")), +# output: +# **wf.get_output_files("popdel", "profile"), +# threads: wf.get_resource("popdel", "profile", "threads") +# resources: +# time=wf.get_resource("popdel", "profile", "time"), +# memory=wf.get_resource("popdel", "profile", "memory"), +# partition=wf.get_resource("popdel", "profile", "partition"), +# tmpdir=wf.get_resource("popdel", "profile", "tmpdir"), +# wildcard_constraints: +# index_ngs_library=r"[^\.]+", +# log: +# **wf.get_log_file("popdel", "profile"), +# wrapper: +# wf.wrapper_path("popdel/profile") +# +# +# rule sv_calling_wgs_popdel_call: +# input: +# unpack(wf.get_input_files("popdel", "call")), +# output: +# **wf.get_output_files("popdel", "call"), +# threads: wf.get_resource("popdel", "call", "threads") +# resources: +# time=wf.get_resource("popdel", "call", "time"), +# memory=wf.get_resource("popdel", "call", "memory"), +# partition=wf.get_resource("popdel", "call", "partition"), +# tmpdir=wf.get_resource("popdel", "call", "tmpdir"), +# log: +# **wf.get_log_file("popdel", "call"), +# wrapper: +# wf.wrapper_path("popdel/call") +# +# +# rule sv_calling_wgs_popdel_concat_calls: +# input: +# unpack(wf.get_input_files("popdel", "concat_calls")), +# output: +# **wf.get_output_files("popdel", "concat_calls"), +# threads: wf.get_resource("popdel", "concat_calls", "threads") +# resources: +# time=wf.get_resource("popdel", "concat_calls", "time"), +# memory=wf.get_resource("popdel", "concat_calls", "memory"), +# partition=wf.get_resource("popdel", "concat_calls", "partition"), +# tmpdir=wf.get_resource("popdel", "concat_calls", "tmpdir"), +# log: +# **wf.get_log_file("popdel", "concat_calls"), +# wrapper: +# wf.wrapper_path("popdel/concat_calls") +# +# +# rule sv_calling_wgs_popdel_reorder_vcf: +# input: +# unpack(wf.get_input_files("popdel", "reorder_vcf")), +# output: +# **wf.get_output_files("popdel", "reorder_vcf"), +# threads: wf.get_resource("popdel", "reorder_vcf", "threads") +# resources: +# time=wf.get_resource("popdel", "reorder_vcf", "time"), +# memory=wf.get_resource("popdel", "reorder_vcf", "memory"), +# partition=wf.get_resource("popdel", "reorder_vcf", "partition"), +# tmpdir=wf.get_resource("popdel", "reorder_vcf", "tmpdir"), +# wildcard_constraints: +# index_ngs_library=r"[^\.]+", +# params: +# ped_members=wf.substep_getattr("popdel", "get_ped_members"), +# log: +# **wf.get_log_file("popdel", "reorder_vcf"), +# wrapper: +# wf.wrapper_path("popdel/reorder_vcf") + + +# Run Melt -------------------------------------------------------------------- + + +# Regular expression for wildcard constraints +RE_NO_DOT = r"[^.]+" + + +rule sv_calling_wgs_melt_preprocess: input: - wf.get_input_files("pb_honey_spots", "run"), + unpack(wf.get_input_files("melt", "preprocess")), output: - **wf.get_output_files("pb_honey_spots", "run"), - threads: wf.get_resource("pb_honey_spots", "run", "threads") + **wf.get_output_files("melt", "preprocess"), + wildcard_constraints: + mapper=RE_NO_DOT, + library_name=RE_NO_DOT, + me_type=RE_NO_DOT, + threads: wf.get_resource("melt", "preprocess", "threads") resources: - time=wf.get_resource("pb_honey_spots", "run", "time"), - memory=wf.get_resource("pb_honey_spots", "run", "memory"), - partition=wf.get_resource("pb_honey_spots", "run", "partition"), - tmpdir=wf.get_resource("pb_honey_spots", "run", "tmpdir"), + time=wf.get_resource("melt", "preprocess", "time"), + memory=wf.get_resource("melt", "preprocess", "memory"), + partition=wf.get_resource("melt", "preprocess", "partition"), log: - wf.get_log_file("pb_honey_spots", "run"), + **wf.get_log_file("melt", "preprocess"), + params: + step_key="sv_calling_wgs", wrapper: - wf.wrapper_path("pb_honey_spots/germline") + wf.wrapper_path("melt/preprocess") -# Run Sniffles ---------------------------------------------------------------- +rule sv_calling_wgs_melt_indiv_analysis: + input: + unpack(wf.get_input_files("melt", "indiv_analysis")), + output: + **wf.get_output_files("melt", "indiv_analysis"), + wildcard_constraints: + mapper=RE_NO_DOT, + library_name=RE_NO_DOT, + me_type=RE_NO_DOT, + threads: wf.get_resource("melt", "indiv_analysis", "threads") + resources: + time=wf.get_resource("melt", "indiv_analysis", "time"), + memory=wf.get_resource("melt", "indiv_analysis", "memory"), + partition=wf.get_resource("melt", "indiv_analysis", "partition"), + log: + **wf.get_log_file("melt", "indiv_analysis"), + params: + step_key="sv_calling_wgs", + wrapper: + wf.wrapper_path("melt/indiv_analysis") -rule sv_calling_wgs_sniffles_run: +rule sv_calling_wgs_melt_group_analysis: input: - wf.get_input_files("sniffles", "run"), + unpack(wf.get_input_files("melt", "group_analysis")), output: - **wf.get_output_files("sniffles", "run"), - threads: wf.get_resource("sniffles", "run", "threads") + **wf.get_output_files("melt", "group_analysis"), + wildcard_constraints: + mapper=RE_NO_DOT, + index_library_name=RE_NO_DOT, + me_type=RE_NO_DOT, + threads: wf.get_resource("melt", "group_analysis", "threads") resources: - time=wf.get_resource("sniffles", "run", "time"), - memory=wf.get_resource("sniffles", "run", "memory"), - partition=wf.get_resource("sniffles", "run", "partition"), - tmpdir=wf.get_resource("sniffles", "run", "tmpdir"), + time=wf.get_resource("melt", "group_analysis", "time"), + memory=wf.get_resource("melt", "group_analysis", "memory"), + partition=wf.get_resource("melt", "group_analysis", "partition"), log: - wf.get_log_file("sniffles", "run"), + **wf.get_log_file("melt", "group_analysis"), + params: + step_key="sv_calling_wgs", wrapper: - wf.wrapper_path("sniffles/germline") + wf.wrapper_path("melt/group_analysis") -# Run Sniffles 2 -------------------------------------------------------------- +rule sv_calling_wgs_melt_genotype: + input: + unpack(wf.get_input_files("melt", "genotype")), + output: + **wf.get_output_files("melt", "genotype"), + wildcard_constraints: + mapper=RE_NO_DOT, + index_library_name=RE_NO_DOT, + me_type=RE_NO_DOT, + threads: wf.get_resource("melt", "genotype", "threads") + resources: + time=wf.get_resource("melt", "genotype", "time"), + memory=wf.get_resource("melt", "genotype", "memory"), + partition=wf.get_resource("melt", "genotype", "partition"), + log: + **wf.get_log_file("melt", "genotype"), + params: + step_key="sv_calling_wgs", + wrapper: + wf.wrapper_path("melt/genotype") -rule sv_calling_wgs_sniffles2_bam_to_snf: +rule sv_calling_wgs_melt_make_vcf: input: - unpack(wf.get_input_files("sniffles2", "bam_to_snf")), + unpack(wf.get_input_files("melt", "make_vcf")), output: - **wf.get_output_files("sniffles2", "bam_to_snf"), - threads: wf.get_resource("sniffles2", "bam_to_snf", "threads") + **wf.get_output_files("melt", "make_vcf"), + wildcard_constraints: + mapper=RE_NO_DOT, + index_library_name=RE_NO_DOT, + threads: wf.get_resource("melt", "make_vcf", "threads") resources: - time=wf.get_resource("sniffles2", "bam_to_snf", "time"), - memory=wf.get_resource("sniffles2", "bam_to_snf", "memory"), - partition=wf.get_resource("sniffles2", "bam_to_snf", "partition"), - tmpdir=wf.get_resource("sniffles2", "bam_to_snf", "tmpdir"), + time=wf.get_resource("melt", "make_vcf", "time"), + memory=wf.get_resource("melt", "make_vcf", "memory"), + partition=wf.get_resource("melt", "make_vcf", "partition"), log: - wf.get_log_file("sniffles2", "bam_to_snf"), + **wf.get_log_file("melt", "make_vcf"), + params: + step_key="sv_calling_wgs", wrapper: - wf.wrapper_path("sniffles2/germline/bam_to_snf") + wf.wrapper_path("melt/make_vcf") -rule sv_calling_wgs_sniffles2_snf_to_vcf: +rule sv_calling_wgs_melt_merge_vcf: input: - unpack(wf.get_input_files("sniffles2", "snf_to_vcf")), + unpack(wf.get_input_files("melt", "merge_vcf")), output: - **wf.get_output_files("sniffles2", "snf_to_vcf"), - threads: wf.get_resource("sniffles2", "snf_to_vcf", "threads") + **wf.get_output_files("melt", "merge_vcf"), + wildcard_constraints: + mapper=RE_NO_DOT, + library_name=RE_NO_DOT, + threads: wf.get_resource("melt", "merge_vcf", "threads") resources: - time=wf.get_resource("sniffles2", "snf_to_vcf", "time"), - memory=wf.get_resource("sniffles2", "snf_to_vcf", "memory"), - partition=wf.get_resource("sniffles2", "snf_to_vcf", "partition"), - tmpdir=wf.get_resource("sniffles2", "snf_to_vcf", "tmpdir"), + time=wf.get_resource("melt", "merge_vcf", "time"), + memory=wf.get_resource("melt", "merge_vcf", "memory"), + partition=wf.get_resource("melt", "merge_vcf", "partition"), log: - wf.get_log_file("sniffles2", "snf_to_vcf"), + **wf.get_log_file("melt", "merge_vcf"), + params: + step_key="sv_calling_wgs", wrapper: - wf.wrapper_path("sniffles2/germline/snf_to_vcf") + wf.wrapper_path("melt/merge_vcf") -# PopDel Steps ---------------------------------------------------------------- +# GATK-gCNV ------------------------------------------------------------------- -ruleorder: sv_calling_wgs_popdel_reorder_vcf > sv_calling_wgs_popdel_concat_calls > sv_calling_wgs_popdel_call > sv_calling_wgs_popdel_profile +rule sv_calling_wgs_gcnv_preprocess_intervals: + input: + unpack(wf.get_input_files("gcnv", "preprocess_intervals")), + output: + **wf.get_output_files("gcnv", "preprocess_intervals"), + threads: wf.get_resource("gcnv", "preprocess_intervals", "threads") + resources: + time=wf.get_resource("gcnv", "preprocess_intervals", "time"), + memory=wf.get_resource("gcnv", "preprocess_intervals", "memory"), + partition=wf.get_resource("gcnv", "preprocess_intervals", "partition"), + tmpdir=wf.get_resource("gcnv", "preprocess_intervals", "tmpdir"), + log: + wf.get_log_file("gcnv", "preprocess_intervals"), + wrapper: + wf.wrapper_path("gcnv/preprocess_intervals") -rule sv_calling_wgs_popdel_profile: +rule sv_calling_wgs_gcnv_coverage: input: - unpack(wf.get_input_files("popdel", "profile")), + unpack(wf.get_input_files("gcnv", "coverage")), output: - **wf.get_output_files("popdel", "profile"), - threads: wf.get_resource("popdel", "profile", "threads") + **wf.get_output_files("gcnv", "coverage"), + threads: wf.get_resource("gcnv", "coverage", "threads") resources: - time=wf.get_resource("popdel", "profile", "time"), - memory=wf.get_resource("popdel", "profile", "memory"), - partition=wf.get_resource("popdel", "profile", "partition"), - tmpdir=wf.get_resource("popdel", "profile", "tmpdir"), - wildcard_constraints: - index_ngs_library=r"[^\.]+", + time=wf.get_resource("gcnv", "coverage", "time"), + memory=wf.get_resource("gcnv", "coverage", "memory"), + partition=wf.get_resource("gcnv", "coverage", "partition"), + tmpdir=wf.get_resource("gcnv", "coverage", "tmpdir"), log: - **wf.get_log_file("popdel", "profile"), + wf.get_log_file("gcnv", "coverage"), wrapper: - wf.wrapper_path("popdel/profile") + wf.wrapper_path("gcnv/coverage") -rule sv_calling_wgs_popdel_call: +rule sv_calling_wgs_gcnv_contig_ploidy: input: - unpack(wf.get_input_files("popdel", "call")), + unpack(wf.get_input_files("gcnv", "contig_ploidy")), output: - **wf.get_output_files("popdel", "call"), - threads: wf.get_resource("popdel", "call", "threads") + **wf.get_output_files("gcnv", "contig_ploidy"), + threads: wf.get_resource("gcnv", "contig_ploidy", "threads") resources: - time=wf.get_resource("popdel", "call", "time"), - memory=wf.get_resource("popdel", "call", "memory"), - partition=wf.get_resource("popdel", "call", "partition"), - tmpdir=wf.get_resource("popdel", "call", "tmpdir"), + time=wf.get_resource("gcnv", "contig_ploidy", "time"), + memory=wf.get_resource("gcnv", "contig_ploidy", "memory"), + partition=wf.get_resource("gcnv", "contig_ploidy", "partition"), + tmpdir=wf.get_resource("gcnv", "contig_ploidy", "tmpdir"), log: - **wf.get_log_file("popdel", "call"), + wf.get_log_file("gcnv", "contig_ploidy"), + params: + args=wf.get_params("gcnv", "contig_ploidy"), wrapper: - wf.wrapper_path("popdel/call") + wf.wrapper_path("gcnv/contig_ploidy_case_mode") -rule sv_calling_wgs_popdel_concat_calls: +rule sv_calling_wgs_gcnv_call_cnvs: input: - unpack(wf.get_input_files("popdel", "concat_calls")), + unpack(wf.get_input_files("gcnv", "call_cnvs")), output: - **wf.get_output_files("popdel", "concat_calls"), - threads: wf.get_resource("popdel", "concat_calls", "threads") + **wf.get_output_files("gcnv", "call_cnvs"), + threads: wf.get_resource("gcnv", "call_cnvs", "threads") resources: - time=wf.get_resource("popdel", "concat_calls", "time"), - memory=wf.get_resource("popdel", "concat_calls", "memory"), - partition=wf.get_resource("popdel", "concat_calls", "partition"), - tmpdir=wf.get_resource("popdel", "concat_calls", "tmpdir"), + time=wf.get_resource("gcnv", "call_cnvs", "time"), + memory=wf.get_resource("gcnv", "call_cnvs", "memory"), + partition=wf.get_resource("gcnv", "call_cnvs", "partition"), + tmpdir=wf.get_resource("gcnv", "call_cnvs", "tmpdir"), log: - **wf.get_log_file("popdel", "concat_calls"), + wf.get_log_file("gcnv", "call_cnvs"), + params: + args=wf.get_params("gcnv", "call_cnvs"), wrapper: - wf.wrapper_path("popdel/concat_calls") + wf.wrapper_path("gcnv/call_cnvs_case_mode") -rule sv_calling_wgs_popdel_reorder_vcf: +rule sv_calling_wgs_gcnv_post_germline_calls: input: - unpack(wf.get_input_files("popdel", "reorder_vcf")), + unpack(wf.get_input_files("gcnv", "post_germline_calls")), output: - **wf.get_output_files("popdel", "reorder_vcf"), - threads: wf.get_resource("popdel", "reorder_vcf", "threads") + **wf.get_output_files("gcnv", "post_germline_calls"), + threads: wf.get_resource("gcnv", "post_germline_calls", "threads") resources: - time=wf.get_resource("popdel", "reorder_vcf", "time"), - memory=wf.get_resource("popdel", "reorder_vcf", "memory"), - partition=wf.get_resource("popdel", "reorder_vcf", "partition"), - tmpdir=wf.get_resource("popdel", "reorder_vcf", "tmpdir"), - wildcard_constraints: - index_ngs_library=r"[^\.]+", + time=wf.get_resource("gcnv", "post_germline_calls", "time"), + memory=wf.get_resource("gcnv", "post_germline_calls", "memory"), + partition=wf.get_resource("gcnv", "post_germline_calls", "partition"), + tmpdir=wf.get_resource("gcnv", "post_germline_calls", "tmpdir"), + log: + wf.get_log_file("gcnv", "post_germline_calls"), params: - ped_members=wf.substep_getattr("popdel", "get_ped_members"), + args=wf.get_params("gcnv", "post_germline_calls"), + wrapper: + wf.wrapper_path("gcnv/post_germline_calls_case_mode") + + +rule sv_calling_wgs_gcnv_joint_germline_cnv_segmentation: + input: + unpack(wf.get_input_files("gcnv", "joint_germline_cnv_segmentation")), + output: + **wf.get_output_files("gcnv", "joint_germline_cnv_segmentation"), + threads: wf.get_resource("gcnv", "joint_germline_cnv_segmentation", "threads") + resources: + time=wf.get_resource("gcnv", "joint_germline_cnv_segmentation", "time"), + memory=wf.get_resource("gcnv", "joint_germline_cnv_segmentation", "memory"), + partition=wf.get_resource("gcnv", "joint_germline_cnv_segmentation", "partition"), + tmpdir=wf.get_resource("gcnv", "joint_germline_cnv_segmentation", "tmpdir"), log: - **wf.get_log_file("popdel", "reorder_vcf"), + **wf.get_log_file("gcnv", "joint_germline_cnv_segmentation"), wrapper: - wf.wrapper_path("popdel/reorder_vcf") + wf.wrapper_path("gcnv/joint_germline_cnv_segmentation") diff --git a/snappy_pipeline/workflows/sv_calling_wgs/__init__.py b/snappy_pipeline/workflows/sv_calling_wgs/__init__.py index 69d0a5cea..c08bc724f 100644 --- a/snappy_pipeline/workflows/sv_calling_wgs/__init__.py +++ b/snappy_pipeline/workflows/sv_calling_wgs/__init__.py @@ -4,7 +4,6 @@ from collections import OrderedDict from biomedsheets.shortcuts import GermlineCaseSheet, is_not_background -from snakemake.io import touch from snappy_pipeline.utils import dictify, listify from snappy_pipeline.workflows.abstract import ( @@ -20,6 +19,7 @@ from snappy_pipeline.workflows.common.delly import Delly2StepPart from snappy_pipeline.workflows.common.gcnv.gcnv_run import RunGcnvStepPart from snappy_pipeline.workflows.common.manta import MantaStepPart +from snappy_pipeline.workflows.common.melt import MeltStepPart from snappy_pipeline.workflows.common.sv_calling import ( SvCallingGetLogFileMixin, SvCallingGetResultFilesMixin, @@ -242,157 +242,6 @@ def get_ped_members(self, wildcards): ) -class MeltStepPart( - SvCallingGetResultFilesMixin, - SvCallingGetLogFileMixin, - ForwardSnakemakeFilesMixin, - ForwardResourceUsageMixin, - BaseStepPart, -): - """MEI calling using MELT - - We implement the workflow as per-pedigree calling. Generally, this leads to consistent - positions within each pedigree but not necessarily across the whole cohort. - - Note that MELT is not free software, so further setup is needed. - """ - - name = "melt" - actions = ( - "preprocess", - "indiv_analysis", - "group_analysis", - "genotype", - "make_vcf", - "merge_vcf", - "reorder_vcf", - ) - - _resource_usage = ResourceUsage( - threads=6, - time="1-00:00:00", - memory=f"{int(3.75 * 1024 * 6)}M", - ) - resource_usage_dict = { - "preprocess": _resource_usage, - "indiv_analysis": _resource_usage, - "group_analysis": _resource_usage, - "genotype": _resource_usage, - "make_vcf": _resource_usage, - "merge_vcf": _resource_usage, - "reorder_vcf": _resource_usage, - } - - def __init__(self, parent): - super().__init__(parent) - #: All individual's primary NGS libraries - self.all_dna_ngs_libraries = [] - for sheet in self.parent.shortcut_sheets: - for donor in sheet.donors: - if donor.dna_ngs_library: - self.all_dna_ngs_libraries.append(donor.dna_ngs_library.name) - #: Linking NGS libraries to pedigree - self.index_ngs_library_to_pedigree = OrderedDict() - for sheet in filter(is_not_background, self.parent.shortcut_sheets): - self.index_ngs_library_to_pedigree.update(sheet.index_ngs_library_to_pedigree) - - @dictify - def _get_input_files_preprocess(self, wildcards): - ngs_mapping = self.parent.sub_workflows["ngs_mapping"] - infix = f"{wildcards.mapper}.{wildcards.library_name}" - yield "bam", ngs_mapping(f"output/{infix}/out/{infix}.bam") - - @dictify - def _get_output_files_preprocess(self): - # Note that mapper is not part of the output BAM file as MELT infers sample file from BAM - # file name instead of using sample name from BAM header. - prefix = "work/{mapper}.melt.preprocess.{library_name}/out/{library_name}" - yield "orig_bam", f"{prefix}.bam" - yield "orig_bai", f"{prefix}.bam.bai" - yield "disc_bam", f"{prefix}.bam.disc" - yield "disc_bai", f"{prefix}.bam.disc.bai" - yield "disc_fq", f"{prefix}.bam.fq" - - @dictify - def _get_input_files_indiv_analysis(self, wildcards): - infix = f"{wildcards.mapper}.melt.preprocess.{wildcards.library_name}" - yield "orig_bam", f"work/{infix}/out/{wildcards.library_name}.bam" - yield "disc_bam", f"work/{infix}/out/{wildcards.library_name}.bam.disc" - - @dictify - def _get_output_files_indiv_analysis(self): - infix = "{mapper}.melt.indiv_analysis.{me_type}" - yield "done", touch(f"work/{infix}/out/.done.{{library_name}}") - - @listify - def _get_input_files_group_analysis(self, wildcards): - for library_name in self.all_dna_ngs_libraries: - infix = f"{wildcards.mapper}.melt.indiv_analysis.{wildcards.me_type}" - yield f"work/{infix}/out/.done.{library_name}" - - @dictify - def _get_output_files_group_analysis(self): - yield "done", touch("work/{mapper}.melt.group_analysis.{me_type}/out/.done") - - @dictify - def _get_input_files_genotype(self, wildcards): - infix_done = f"{wildcards.mapper}.melt.group_analysis.{wildcards.me_type}" - yield "done", f"work/{infix_done}/out/.done".format(**wildcards) - infix_bam = f"{wildcards.mapper}.melt.preprocess.{wildcards.library_name}" - yield "bam", f"work/{infix_bam}/out/{wildcards.library_name}.bam" - - @dictify - def _get_output_files_genotype(self): - yield "done", touch("work/{mapper}.melt.genotype.{me_type}/out/.done.{library_name}") - - @dictify - def _get_input_files_make_vcf(self, wildcards): - infix = f"{wildcards.mapper}.melt.group_analysis.{wildcards.me_type}" - yield "group_analysis", f"work/{infix}/out/.done" - paths = [] - for library_name in self.all_dna_ngs_libraries: - infix = f"{wildcards.mapper}.melt.genotype.{wildcards.me_type}" - yield f"work/{infix}/out/.done.{library_name}" - yield "genotype", paths - - @dictify - def _get_output_files_make_vcf(self): - infix = "{mapper}.melt.genotype.{me_type}" - yield "list_txt", f"work/{infix}/out/list.txt" - yield "done", touch(f"work/{infix}/out/.done") - yield "vcf", f"work/{infix}.final_comp.vcf.gz" - yield "vcf_tbi", f"work/{infix}.final_comp.vcf.gz.tbi" - - @dictify - def _get_input_files_merge_vcf(self, wildcards): - vcfs = [] - for me_type in self.config["melt"]["me_types"]: - infix = f"{wildcards.mapper}.melt.merge_vcf.{wildcards.me_type}" - vcfs.append(f"work/{infix}/out/{me_type}.final_comp.vcf.gz") - yield "vcf", vcfs - - @dictify - def _get_output_files_merge_vcf(self): - infix = "{mapper}.melt.merge_vcf" - yield "vcf", f"work/{infix}/out/{infix}.vcf.gz" - yield "vcf_md5", f"work/{infix}/out/{infix}.vcf.gz.md5" - yield "vcf_tbi", f"work/{infix}/out/{infix}.vcf.gz.tbi" - yield "vcf_tbi_md5", f"work/{infix}/out/{infix}.vcf.gz.tbi.md5" - - @dictify - def _get_input_files_reorder_vcf(self, wildcards): - infix = f"{wildcards.mapper}.melt.merge_vcf" - yield "vcf", f"work/{infix}/out/{infix}.vcf.gz" - - @dictify - def _get_output_files_reorder_vcf(self): - infix = "{mapper}.melt.{index_library_name}" - yield "vcf", f"work/{infix}/out/{infix}.vcf.gz" - yield "vcf_md5", f"work/{infix}/out/{infix}.vcf.gz.md5" - yield "vcf_tbi", f"work/{infix}/out/{infix}.vcf.gz.tbi" - yield "vcf_tbi_md5", f"work/{infix}/out/{infix}.vcf.gz.tbi.md5" - - class Sniffles2StepPart(BaseStepPart): """WGS SV identification using Sniffles 2""" @@ -468,10 +317,10 @@ def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir) ( Delly2StepPart, MantaStepPart, - PopDelStepPart, + # PopDelStepPart, GcnvWgsStepPart, MeltStepPart, - Sniffles2StepPart, + # Sniffles2StepPart, WritePedigreeStepPart, ) ) diff --git a/snappy_wrappers/wrappers/melt/genotype/wrapper.py b/snappy_wrappers/wrappers/melt/genotype/wrapper.py index ba386778c..1b2a70434 100644 --- a/snappy_wrappers/wrappers/melt/genotype/wrapper.py +++ b/snappy_wrappers/wrappers/melt/genotype/wrapper.py @@ -12,7 +12,7 @@ set -x # ----------------------------------------------------------------------------- -JAR={snakemake.config[step_config][sv_calling_targeted][melt][jar_file]} +JAR={melt_config[jar_file]} ME_REFS={melt_config[me_refs_path]} ME_INFIX={melt_config[me_refs_infix]} diff --git a/snappy_wrappers/wrappers/melt/group_analysis/wrapper.py b/snappy_wrappers/wrappers/melt/group_analysis/wrapper.py index a99e4959b..98f5384d3 100644 --- a/snappy_wrappers/wrappers/melt/group_analysis/wrapper.py +++ b/snappy_wrappers/wrappers/melt/group_analysis/wrapper.py @@ -12,7 +12,7 @@ set -x # ----------------------------------------------------------------------------- -JAR={snakemake.config[step_config][sv_calling_targeted][melt][jar_file]} +JAR={melt_config[jar_file]} ME_REFS={melt_config[me_refs_path]} ME_INFIX={melt_config[me_refs_infix]} diff --git a/snappy_wrappers/wrappers/melt/indiv_analysis/wrapper.py b/snappy_wrappers/wrappers/melt/indiv_analysis/wrapper.py index 6ebed325c..34d41106e 100644 --- a/snappy_wrappers/wrappers/melt/indiv_analysis/wrapper.py +++ b/snappy_wrappers/wrappers/melt/indiv_analysis/wrapper.py @@ -13,7 +13,7 @@ set -x # ----------------------------------------------------------------------------- -JAR={snakemake.config[step_config][sv_calling_targeted][melt][jar_file]} +JAR={melt_config[jar_file]} ME_REFS={melt_config[me_refs_path]} ME_INFIX={melt_config[me_refs_infix]} diff --git a/snappy_wrappers/wrappers/melt/make_vcf/wrapper.py b/snappy_wrappers/wrappers/melt/make_vcf/wrapper.py index b78b173de..1c0437827 100644 --- a/snappy_wrappers/wrappers/melt/make_vcf/wrapper.py +++ b/snappy_wrappers/wrappers/melt/make_vcf/wrapper.py @@ -12,7 +12,7 @@ set -x # ----------------------------------------------------------------------------- -JAR={snakemake.config[step_config][sv_calling_targeted][melt][jar_file]} +JAR={melt_config[jar_file]} ME_REFS={melt_config[me_refs_path]} ME_INFIX={melt_config[me_refs_infix]} diff --git a/snappy_wrappers/wrappers/melt/preprocess/wrapper.py b/snappy_wrappers/wrappers/melt/preprocess/wrapper.py index 37a37d76e..076a52615 100644 --- a/snappy_wrappers/wrappers/melt/preprocess/wrapper.py +++ b/snappy_wrappers/wrappers/melt/preprocess/wrapper.py @@ -2,6 +2,8 @@ __author__ = "Manuel Holtgrewe " +melt_config = snakemake.config["step_config"][snakemake.params.step_key]["melt"] + shell( r""" # ----------------------------------------------------------------------------- @@ -13,7 +15,7 @@ ln -sr {snakemake.input.bam} {snakemake.output.orig_bam} ln -sr {snakemake.input.bai} {snakemake.output.orig_bai} -JAR={snakemake.config[step_config][sv_calling_targeted][melt][jar_file]} +JAR={melt_config[jar_file]} java -Xmx4G -jar $JAR \ Preprocess \