bihealth · holtgrewe · Jan 3, 2023 · Dec 30, 2022 · Dec 30, 2022 · Dec 30, 2022
diff --git a/docs/index.rst b/docs/index.rst
@@ -76,11 +76,7 @@ Project Info
     step/variant_denovo_filtration
     step/variant_phasing
     step/variant_filtration
-    step/wgs_cnv_annotation
-    step/wgs_cnv_calling
-    step/wgs_mei_calling
-    step/wgs_sv_annotation
-    step/wgs_sv_calling
+    step/sv_calling_wgs
     step/wgs_sv_filtration
 
 

diff --git a/docs/step/sv_calling_wgs.rst b/docs/step/sv_calling_wgs.rst
@@ -0,0 +1,7 @@
+.. _step_sv_calling_wgs:
+
+===================
+Germline SV Calling
+===================
+
+.. automodule:: snappy_pipeline.workflows.sv_calling_wgs
diff --git a/docs/step/wgs_cnv_annotation.rst b/docs/step/wgs_cnv_annotation.rst
diff --git a/docs/step/wgs_cnv_calling.rst b/docs/step/wgs_cnv_calling.rst
diff --git a/docs/step/wgs_mei_calling.rst b/docs/step/wgs_mei_calling.rst
diff --git a/docs/step/wgs_sv_calling.rst b/docs/step/wgs_sv_calling.rst
diff --git a/snappy_pipeline/apps/snappy_snake.py b/snappy_pipeline/apps/snappy_snake.py
@@ -47,30 +47,17 @@
     somatic_wgs_cnv_calling,
     somatic_wgs_sv_calling,
     sv_calling_targeted,
-    targeted_seq_cnv_annotation,
-    targeted_seq_cnv_export,
-    targeted_seq_mei_calling,
+    sv_calling_wgs,
     tcell_crg_report,
     varfish_export,
     variant_annotation,
     variant_calling,
     variant_checking,
-    variant_combination,
     variant_denovo_filtration,
     variant_export_external,
     variant_filtration,
     variant_phasing,
-    wgs_cnv_annotation,
-    wgs_cnv_calling,
-    wgs_cnv_export_external,
-    wgs_cnv_filtration,
-    wgs_mei_annotation,
-    wgs_mei_calling,
-    wgs_mei_filtration,
-    wgs_sv_annotation,
-    wgs_sv_calling,
     wgs_sv_export_external,
-    wgs_sv_filtration,
 )
 
 __author__ = "Manuel Holtgrewe <manuel.holtgrewe@bih-charite.de>"
@@ -112,31 +99,18 @@
     "somatic_variant_signatures": somatic_variant_signatures,
     "somatic_wgs_cnv_calling": somatic_wgs_cnv_calling,
     "somatic_wgs_sv_calling": somatic_wgs_sv_calling,
-    "targeted_seq_cnv_annotation": targeted_seq_cnv_annotation,
     "sv_calling_targeted": sv_calling_targeted,
-    "targeted_seq_cnv_export": targeted_seq_cnv_export,
-    "targeted_seq_mei_calling": targeted_seq_mei_calling,
+    "sv_calling_wgs": sv_calling_wgs,
     "tcell_crg_report": tcell_crg_report,
     "varfish_export": varfish_export,
     "variant_annotation": variant_annotation,
     "variant_calling": variant_calling,
     "variant_checking": variant_checking,
-    "variant_combination": variant_combination,
     "variant_denovo_filtration": variant_denovo_filtration,
     "variant_export_external": variant_export_external,
     "variant_filtration": variant_filtration,
     "variant_phasing": variant_phasing,
-    "wgs_cnv_annotation": wgs_cnv_annotation,
-    "wgs_cnv_calling": wgs_cnv_calling,
-    "wgs_cnv_export_external": wgs_cnv_export_external,
-    "wgs_cnv_filtration": wgs_cnv_filtration,
-    "wgs_mei_annotation": wgs_mei_annotation,
-    "wgs_mei_calling": wgs_mei_calling,
-    "wgs_mei_filtration": wgs_mei_filtration,
-    "wgs_sv_annotation": wgs_sv_annotation,
-    "wgs_sv_calling": wgs_sv_calling,
     "wgs_sv_export_external": wgs_sv_export_external,
-    "wgs_sv_filtration": wgs_sv_filtration,
 }
 
 

diff --git a/snappy_pipeline/workflows/abstract/common.py b/snappy_pipeline/workflows/abstract/common.py
@@ -1,5 +1,7 @@
 """Commonly used code and types"""
 
+from itertools import chain
+import re
 import typing
 
 from snakemake.io import Wildcards
@@ -60,3 +62,18 @@ def get_resource_usage(self, action: str) -> ResourceUsage:
         assert self.resource_usage_dict is not None, "resource_usage_dict not set!"
         assert action in self.resource_usage_dict, f"No resource usage entry for {action}"
         return self.resource_usage_dict[action]
+
+
+def augment_work_dir_with_output_links(
+    work_dir_dict: SnakemakeDict, log_files: typing.Optional[typing.List[str]] = None
+) -> SnakemakeDict:
+    """Augment a dictionary with key/value pairs to work directory with ``"output_links"`` key.
+
+    Optionally, the output files will be augmented from the paths in ``log_files``.
+    """
+    result = dict(work_dir_dict)
+    result["output_links"] = [
+        re.sub(r"^work/", "output/", work_path)
+        for work_path in chain(work_dir_dict.values(), log_files or [])
+    ]
+    return result
diff --git a/snappy_pipeline/workflows/common/__init__.py b/snappy_pipeline/workflows/common/__init__.py
@@ -0,0 +1 @@
+"""Code shared between workflows"""
diff --git a/snappy_pipeline/workflows/common/delly.py b/snappy_pipeline/workflows/common/delly.py
@@ -0,0 +1,128 @@
+"""Workflow step parts for Delly.
+
+These are used in both ``sv_calling_targeted`` and ``sv_calling_wgs``.
+"""
+
+from snappy_pipeline.utils import dictify
+from snappy_pipeline.workflows.abstract import BaseStepPart
+from snappy_pipeline.workflows.abstract.common import (
+    ForwardResourceUsageMixin,
+    ForwardSnakemakeFilesMixin,
+    augment_work_dir_with_output_links,
+)
+from snappy_pipeline.workflows.common.sv_calling import (
+    SvCallingGetLogFileMixin,
+    SvCallingGetResultFilesMixin,
+)
+from snappy_wrappers.resource_usage import ResourceUsage
+
+
+class Delly2StepPart(
+    SvCallingGetResultFilesMixin,
+    SvCallingGetLogFileMixin,
+    ForwardSnakemakeFilesMixin,
+    ForwardResourceUsageMixin,
+    BaseStepPart,
+):
+    """Perform SV calling on exomes using Delly2"""
+
+    name = "delly2"
+    actions = ("call", "merge_calls", "genotype", "merge_genotypes")
+
+    _cheap_resource_usage = ResourceUsage(
+        threads=2,
+        time="4-00:00:00",
+        memory=f"{7 * 1024 * 2}M",
+    )
+    _normal_resource_usage = ResourceUsage(
+        threads=2,
+        time="7-00:00:00",  # 7 days
+        memory=f"{20 * 1024 * 2}M",
+    )
+    resource_usage_dict = {
+        "call": _normal_resource_usage,
+        "merge_calls": _cheap_resource_usage,
+        "genotype": _normal_resource_usage,
+        "merge_genotypes": _cheap_resource_usage,
+    }
+
+    def __init__(self, parent):
+        super().__init__(parent)
+
+        self.index_ngs_library_to_pedigree = {}
+        for sheet in self.parent.shortcut_sheets:
+            self.index_ngs_library_to_pedigree.update(sheet.index_ngs_library_to_pedigree)
+
+        self.donor_ngs_library_to_pedigree = {}
+        for sheet in self.parent.shortcut_sheets:
+            self.donor_ngs_library_to_pedigree.update(sheet.donor_ngs_library_to_pedigree)
+
+    @dictify
+    def _get_input_files_call(self, wildcards):
+        ngs_mapping = self.parent.sub_workflows["ngs_mapping"]
+        token = f"{wildcards.mapper}.{wildcards.library_name}"
+        yield "bam", ngs_mapping(f"output/{token}/out/{token}.bam")
+
+    @dictify
+    def _get_output_files_call(self):
+        infix = "{mapper}.delly2_call.{library_name}"
+        yield "bcf", f"work/{infix}/out/{infix}.bcf"
+        yield "bcf_md5", f"work/{infix}/out/{infix}.bcf.md5"
+        yield "bcf_csi", f"work/{infix}/out/{infix}.bcf.csi"
+        yield "bcf_csi_md5", f"work/{infix}/out/{infix}.bcf.csi.md5"
+
+    @dictify
+    def _get_input_files_merge_calls(self, wildcards):
+        bcfs = []
+        pedigree = self.index_ngs_library_to_pedigree[wildcards.library_name]
+        for donor in pedigree.donors:
+            if donor.dna_ngs_library:
+                infix = f"{wildcards.mapper}.delly2_call.{donor.dna_ngs_library.name}"
+                bcfs.append(f"work/{infix}/out/{infix}.bcf")
+        yield "bcf", bcfs
+
+    @dictify
+    def _get_output_files_merge_calls(self):
+        infix = "{mapper}.delly2_merge_calls.{library_name}"
+        yield "bcf", f"work/{infix}/out/{infix}.bcf"
+        yield "bcf_md5", f"work/{infix}/out/{infix}.bcf.md5"
+        yield "bcf_csi", f"work/{infix}/out/{infix}.bcf.csi"
+        yield "bcf_csi_md5", f"work/{infix}/out/{infix}.bcf.csi.md5"
+
+    @dictify
+    def _get_input_files_genotype(self, wildcards):
+        yield from self._get_input_files_call(wildcards).items()
+        pedigree = self.donor_ngs_library_to_pedigree[wildcards.library_name]
+        infix = f"{wildcards.mapper}.delly2_merge_calls.{pedigree.index.dna_ngs_library.name}"
+        yield "bcf", f"work/{infix}/out/{infix}.bcf"
+
+    @dictify
+    def _get_output_files_genotype(self):
+        infix = "{mapper}.delly2_genotype.{library_name}"
+        yield "bcf", f"work/{infix}/out/{infix}.bcf"
+        yield "bcf_md5", f"work/{infix}/out/{infix}.bcf.md5"
+        yield "bcf_csi", f"work/{infix}/out/{infix}.bcf.csi"
+        yield "bcf_csi_md5", f"work/{infix}/out/{infix}.bcf.csi.md5"
+
+    @dictify
+    def _get_input_files_merge_genotypes(self, wildcards):
+        bcfs = []
+        pedigree = self.index_ngs_library_to_pedigree[wildcards.library_name]
+        for donor in pedigree.donors:
+            if donor.dna_ngs_library:
+                infix = f"{wildcards.mapper}.delly2_genotype.{donor.dna_ngs_library.name}"
+                bcfs.append(f"work/{infix}/out/{infix}.bcf")
+        yield "bcf", bcfs
+
+    @dictify
+    def _get_output_files_merge_genotypes(self):
+        infix = "{mapper}.delly2.{library_name}"
+        work_files = {
+            "vcf": f"work/{infix}/out/{infix}.vcf.gz",
+            "vcf_md5": f"work/{infix}/out/{infix}.vcf.gz.md5",
+            "vcf_tbi": f"work/{infix}/out/{infix}.vcf.gz.tbi",
+            "vcf_tbi_md5": f"work/{infix}/out/{infix}.vcf.gz.tbi.md5",
+        }
+        yield from augment_work_dir_with_output_links(
+            work_files, self.get_log_file("merge_genotypes").values()
+        ).items()
diff --git a/snappy_pipeline/workflows/gcnv/__init__.py → ...ipeline/workflows/common/gcnv/__init__.py b/snappy_pipeline/workflows/gcnv/__init__.py → ...ipeline/workflows/common/gcnv/__init__.py
@@ -7,8 +7,8 @@
 
 - ``helper_gcnv_build_model_wgs``
 - ``helper_gcnv_build_model_targeted_seq``
-- ``wgs_cnv_calling``
 - ``sv_calling_targeted``
+- ``sv_calling_wgs``
 
 We only implement calling in CASE mode, COHORT mode is only used for building the background
 model.  However, note that we run the CASE mode on all samples from a given sheet.  This may

diff --git a/...peline/workflows/gcnv/gcnv_build_model.py → ...workflows/common/gcnv/gcnv_build_model.py b/...peline/workflows/gcnv/gcnv_build_model.py → ...workflows/common/gcnv/gcnv_build_model.py
@@ -5,7 +5,7 @@
 from snakemake.io import expand, touch
 
 from snappy_pipeline.utils import dictify, listify
-from snappy_pipeline.workflows.gcnv.gcnv_common import GcnvCommonStepPart
+from snappy_pipeline.workflows.common.gcnv.gcnv_common import GcnvCommonStepPart
 
 
 class AnnotateGcMixin:

diff --git a/...py_pipeline/workflows/gcnv/gcnv_common.py → ...line/workflows/common/gcnv/gcnv_common.py b/...py_pipeline/workflows/gcnv/gcnv_common.py → ...line/workflows/common/gcnv/gcnv_common.py
diff --git a/snappy_pipeline/workflows/gcnv/gcnv_run.py → ...ipeline/workflows/common/gcnv/gcnv_run.py b/snappy_pipeline/workflows/gcnv/gcnv_run.py → ...ipeline/workflows/common/gcnv/gcnv_run.py
@@ -13,7 +13,7 @@
 
 from snappy_pipeline.base import InvalidConfiguration, UnsupportedActionException
 from snappy_pipeline.utils import dictify, flatten, listify
-from snappy_pipeline.workflows.gcnv.gcnv_common import (
+from snappy_pipeline.workflows.common.gcnv.gcnv_common import (
     GcnvCommonStepPart,
     InconsistentLibraryKitsWarning,
 )

diff --git a/snappy_pipeline/workflows/common/manta.py b/snappy_pipeline/workflows/common/manta.py
@@ -0,0 +1,68 @@
+"""Workflow step parts for Manta.
+
+These are used in both ``sv_calling_targeted`` and ``sv_calling_wgs``.
+"""
+
+from snappy_pipeline.utils import dictify
+from snappy_pipeline.workflows.abstract import BaseStepPart
+from snappy_pipeline.workflows.abstract.common import (
+    ForwardResourceUsageMixin,
+    ForwardSnakemakeFilesMixin,
+    augment_work_dir_with_output_links,
+)
+from snappy_pipeline.workflows.common.sv_calling import (
+    SvCallingGetLogFileMixin,
+    SvCallingGetResultFilesMixin,
+)
+from snappy_wrappers.resource_usage import ResourceUsage
+
+
+class MantaStepPart(
+    SvCallingGetLogFileMixin,
+    SvCallingGetResultFilesMixin,
+    ForwardSnakemakeFilesMixin,
+    ForwardResourceUsageMixin,
+    BaseStepPart,
+):
+    """Perform SV calling on exomes using Manta"""
+
+    name = "manta"
+    actions = ("run",)
+
+    resource_usage_dict = {
+        "run": ResourceUsage(
+            threads=16,
+            time="2-00:00:00",
+            memory=f"{int(3.75 * 1024 * 16)}M",
+        )
+    }
+
+    def __init__(self, parent):
+        super().__init__(parent)
+        #: Shortcuts from index NGS library name to Pedigree
+        self.index_ngs_library_to_pedigree = {}
+        for sheet in self.parent.shortcut_sheets:
+            self.index_ngs_library_to_pedigree.update(sheet.index_ngs_library_to_pedigree)
+
+    @dictify
+    def _get_input_files_run(self, wildcards):
+        ngs_mapping = self.parent.sub_workflows["ngs_mapping"]
+        bams = []
+        for donor in self.index_ngs_library_to_pedigree[wildcards.library_name].donors:
+            if donor.dna_ngs_library:
+                token = f"{wildcards.mapper}.{donor.dna_ngs_library.name}"
+                bams.append(ngs_mapping(f"output/{token}/out/{token}.bam"))
+        yield "bam", bams
+
+    @dictify
+    def _get_output_files_run(self):
+        infix = "{mapper}.manta.{library_name}"
+        work_files = {
+            "vcf": f"work/{infix}/out/{infix}.vcf.gz",
+            "vcf_md5": f"work/{infix}/out/{infix}.vcf.gz.md5",
+            "vcf_tbi": f"work/{infix}/out/{infix}.vcf.gz.tbi",
+            "vcf_tbi_md5": f"work/{infix}/out/{infix}.vcf.gz.tbi.md5",
+        }
+        yield from augment_work_dir_with_output_links(
+            work_files, self.get_log_file().values()
+        ).items()