Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor!: merge wgs_{sv,cnv,mei}_calling into sv_calling_wgs (#275) #315

Merged
merged 4 commits into from
Jan 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,7 @@ Project Info
step/variant_denovo_filtration
step/variant_phasing
step/variant_filtration
step/wgs_cnv_annotation
step/wgs_cnv_calling
step/wgs_mei_calling
step/wgs_sv_annotation
step/wgs_sv_calling
step/sv_calling_wgs
step/wgs_sv_filtration


Expand Down
7 changes: 7 additions & 0 deletions docs/step/sv_calling_wgs.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.. _step_sv_calling_wgs:

===================
Germline SV Calling
===================

.. automodule:: snappy_pipeline.workflows.sv_calling_wgs
7 changes: 0 additions & 7 deletions docs/step/wgs_cnv_annotation.rst

This file was deleted.

7 changes: 0 additions & 7 deletions docs/step/wgs_cnv_calling.rst

This file was deleted.

7 changes: 0 additions & 7 deletions docs/step/wgs_mei_calling.rst

This file was deleted.

7 changes: 0 additions & 7 deletions docs/step/wgs_sv_calling.rst

This file was deleted.

30 changes: 2 additions & 28 deletions snappy_pipeline/apps/snappy_snake.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,30 +47,17 @@
somatic_wgs_cnv_calling,
somatic_wgs_sv_calling,
sv_calling_targeted,
targeted_seq_cnv_annotation,
targeted_seq_cnv_export,
targeted_seq_mei_calling,
sv_calling_wgs,
tcell_crg_report,
varfish_export,
variant_annotation,
variant_calling,
variant_checking,
variant_combination,
variant_denovo_filtration,
variant_export_external,
variant_filtration,
variant_phasing,
wgs_cnv_annotation,
wgs_cnv_calling,
wgs_cnv_export_external,
wgs_cnv_filtration,
wgs_mei_annotation,
wgs_mei_calling,
wgs_mei_filtration,
wgs_sv_annotation,
wgs_sv_calling,
wgs_sv_export_external,
wgs_sv_filtration,
)

__author__ = "Manuel Holtgrewe <manuel.holtgrewe@bih-charite.de>"
Expand Down Expand Up @@ -112,31 +99,18 @@
"somatic_variant_signatures": somatic_variant_signatures,
"somatic_wgs_cnv_calling": somatic_wgs_cnv_calling,
"somatic_wgs_sv_calling": somatic_wgs_sv_calling,
"targeted_seq_cnv_annotation": targeted_seq_cnv_annotation,
"sv_calling_targeted": sv_calling_targeted,
"targeted_seq_cnv_export": targeted_seq_cnv_export,
"targeted_seq_mei_calling": targeted_seq_mei_calling,
"sv_calling_wgs": sv_calling_wgs,
"tcell_crg_report": tcell_crg_report,
"varfish_export": varfish_export,
"variant_annotation": variant_annotation,
"variant_calling": variant_calling,
"variant_checking": variant_checking,
"variant_combination": variant_combination,
"variant_denovo_filtration": variant_denovo_filtration,
"variant_export_external": variant_export_external,
"variant_filtration": variant_filtration,
"variant_phasing": variant_phasing,
"wgs_cnv_annotation": wgs_cnv_annotation,
"wgs_cnv_calling": wgs_cnv_calling,
"wgs_cnv_export_external": wgs_cnv_export_external,
"wgs_cnv_filtration": wgs_cnv_filtration,
"wgs_mei_annotation": wgs_mei_annotation,
"wgs_mei_calling": wgs_mei_calling,
"wgs_mei_filtration": wgs_mei_filtration,
"wgs_sv_annotation": wgs_sv_annotation,
"wgs_sv_calling": wgs_sv_calling,
"wgs_sv_export_external": wgs_sv_export_external,
"wgs_sv_filtration": wgs_sv_filtration,
}


Expand Down
17 changes: 17 additions & 0 deletions snappy_pipeline/workflows/abstract/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Commonly used code and types"""

from itertools import chain
import re
import typing

from snakemake.io import Wildcards
Expand Down Expand Up @@ -60,3 +62,18 @@ def get_resource_usage(self, action: str) -> ResourceUsage:
assert self.resource_usage_dict is not None, "resource_usage_dict not set!"
assert action in self.resource_usage_dict, f"No resource usage entry for {action}"
return self.resource_usage_dict[action]


def augment_work_dir_with_output_links(
work_dir_dict: SnakemakeDict, log_files: typing.Optional[typing.List[str]] = None
) -> SnakemakeDict:
"""Augment a dictionary with key/value pairs to work directory with ``"output_links"`` key.

Optionally, the output files will be augmented from the paths in ``log_files``.
"""
result = dict(work_dir_dict)
result["output_links"] = [
re.sub(r"^work/", "output/", work_path)
for work_path in chain(work_dir_dict.values(), log_files or [])
]
return result
1 change: 1 addition & 0 deletions snappy_pipeline/workflows/common/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Code shared between workflows"""
128 changes: 128 additions & 0 deletions snappy_pipeline/workflows/common/delly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
"""Workflow step parts for Delly.

These are used in both ``sv_calling_targeted`` and ``sv_calling_wgs``.
"""

from snappy_pipeline.utils import dictify
from snappy_pipeline.workflows.abstract import BaseStepPart
from snappy_pipeline.workflows.abstract.common import (
ForwardResourceUsageMixin,
ForwardSnakemakeFilesMixin,
augment_work_dir_with_output_links,
)
from snappy_pipeline.workflows.common.sv_calling import (
SvCallingGetLogFileMixin,
SvCallingGetResultFilesMixin,
)
from snappy_wrappers.resource_usage import ResourceUsage


class Delly2StepPart(
SvCallingGetResultFilesMixin,
SvCallingGetLogFileMixin,
ForwardSnakemakeFilesMixin,
ForwardResourceUsageMixin,
BaseStepPart,
):
"""Perform SV calling on exomes using Delly2"""

name = "delly2"
actions = ("call", "merge_calls", "genotype", "merge_genotypes")

_cheap_resource_usage = ResourceUsage(
threads=2,
time="4-00:00:00",
memory=f"{7 * 1024 * 2}M",
)
_normal_resource_usage = ResourceUsage(
threads=2,
time="7-00:00:00", # 7 days
memory=f"{20 * 1024 * 2}M",
)
resource_usage_dict = {
"call": _normal_resource_usage,
"merge_calls": _cheap_resource_usage,
"genotype": _normal_resource_usage,
"merge_genotypes": _cheap_resource_usage,
}

def __init__(self, parent):
super().__init__(parent)

self.index_ngs_library_to_pedigree = {}
for sheet in self.parent.shortcut_sheets:
self.index_ngs_library_to_pedigree.update(sheet.index_ngs_library_to_pedigree)

self.donor_ngs_library_to_pedigree = {}
for sheet in self.parent.shortcut_sheets:
self.donor_ngs_library_to_pedigree.update(sheet.donor_ngs_library_to_pedigree)

@dictify
def _get_input_files_call(self, wildcards):
ngs_mapping = self.parent.sub_workflows["ngs_mapping"]
token = f"{wildcards.mapper}.{wildcards.library_name}"
yield "bam", ngs_mapping(f"output/{token}/out/{token}.bam")

@dictify
def _get_output_files_call(self):
infix = "{mapper}.delly2_call.{library_name}"
yield "bcf", f"work/{infix}/out/{infix}.bcf"
yield "bcf_md5", f"work/{infix}/out/{infix}.bcf.md5"
yield "bcf_csi", f"work/{infix}/out/{infix}.bcf.csi"
yield "bcf_csi_md5", f"work/{infix}/out/{infix}.bcf.csi.md5"

@dictify
def _get_input_files_merge_calls(self, wildcards):
bcfs = []
pedigree = self.index_ngs_library_to_pedigree[wildcards.library_name]
for donor in pedigree.donors:
if donor.dna_ngs_library:
infix = f"{wildcards.mapper}.delly2_call.{donor.dna_ngs_library.name}"
bcfs.append(f"work/{infix}/out/{infix}.bcf")
yield "bcf", bcfs

@dictify
def _get_output_files_merge_calls(self):
infix = "{mapper}.delly2_merge_calls.{library_name}"
yield "bcf", f"work/{infix}/out/{infix}.bcf"
yield "bcf_md5", f"work/{infix}/out/{infix}.bcf.md5"
yield "bcf_csi", f"work/{infix}/out/{infix}.bcf.csi"
yield "bcf_csi_md5", f"work/{infix}/out/{infix}.bcf.csi.md5"

@dictify
def _get_input_files_genotype(self, wildcards):
yield from self._get_input_files_call(wildcards).items()
pedigree = self.donor_ngs_library_to_pedigree[wildcards.library_name]
infix = f"{wildcards.mapper}.delly2_merge_calls.{pedigree.index.dna_ngs_library.name}"
yield "bcf", f"work/{infix}/out/{infix}.bcf"

@dictify
def _get_output_files_genotype(self):
infix = "{mapper}.delly2_genotype.{library_name}"
yield "bcf", f"work/{infix}/out/{infix}.bcf"
yield "bcf_md5", f"work/{infix}/out/{infix}.bcf.md5"
yield "bcf_csi", f"work/{infix}/out/{infix}.bcf.csi"
yield "bcf_csi_md5", f"work/{infix}/out/{infix}.bcf.csi.md5"

@dictify
def _get_input_files_merge_genotypes(self, wildcards):
bcfs = []
pedigree = self.index_ngs_library_to_pedigree[wildcards.library_name]
for donor in pedigree.donors:
if donor.dna_ngs_library:
infix = f"{wildcards.mapper}.delly2_genotype.{donor.dna_ngs_library.name}"
bcfs.append(f"work/{infix}/out/{infix}.bcf")
yield "bcf", bcfs

@dictify
def _get_output_files_merge_genotypes(self):
infix = "{mapper}.delly2.{library_name}"
work_files = {
"vcf": f"work/{infix}/out/{infix}.vcf.gz",
"vcf_md5": f"work/{infix}/out/{infix}.vcf.gz.md5",
"vcf_tbi": f"work/{infix}/out/{infix}.vcf.gz.tbi",
"vcf_tbi_md5": f"work/{infix}/out/{infix}.vcf.gz.tbi.md5",
}
yield from augment_work_dir_with_output_links(
work_files, self.get_log_file("merge_genotypes").values()
).items()
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

- ``helper_gcnv_build_model_wgs``
- ``helper_gcnv_build_model_targeted_seq``
- ``wgs_cnv_calling``
- ``sv_calling_targeted``
- ``sv_calling_wgs``

We only implement calling in CASE mode, COHORT mode is only used for building the background
model. However, note that we run the CASE mode on all samples from a given sheet. This may
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from snakemake.io import expand, touch

from snappy_pipeline.utils import dictify, listify
from snappy_pipeline.workflows.gcnv.gcnv_common import GcnvCommonStepPart
from snappy_pipeline.workflows.common.gcnv.gcnv_common import GcnvCommonStepPart


class AnnotateGcMixin:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from snappy_pipeline.base import InvalidConfiguration, UnsupportedActionException
from snappy_pipeline.utils import dictify, flatten, listify
from snappy_pipeline.workflows.gcnv.gcnv_common import (
from snappy_pipeline.workflows.common.gcnv.gcnv_common import (
GcnvCommonStepPart,
InconsistentLibraryKitsWarning,
)
Expand Down
68 changes: 68 additions & 0 deletions snappy_pipeline/workflows/common/manta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""Workflow step parts for Manta.

These are used in both ``sv_calling_targeted`` and ``sv_calling_wgs``.
"""

from snappy_pipeline.utils import dictify
from snappy_pipeline.workflows.abstract import BaseStepPart
from snappy_pipeline.workflows.abstract.common import (
ForwardResourceUsageMixin,
ForwardSnakemakeFilesMixin,
augment_work_dir_with_output_links,
)
from snappy_pipeline.workflows.common.sv_calling import (
SvCallingGetLogFileMixin,
SvCallingGetResultFilesMixin,
)
from snappy_wrappers.resource_usage import ResourceUsage


class MantaStepPart(
SvCallingGetLogFileMixin,
SvCallingGetResultFilesMixin,
ForwardSnakemakeFilesMixin,
ForwardResourceUsageMixin,
BaseStepPart,
):
"""Perform SV calling on exomes using Manta"""

name = "manta"
actions = ("run",)

resource_usage_dict = {
"run": ResourceUsage(
threads=16,
time="2-00:00:00",
memory=f"{int(3.75 * 1024 * 16)}M",
)
}

def __init__(self, parent):
super().__init__(parent)
#: Shortcuts from index NGS library name to Pedigree
self.index_ngs_library_to_pedigree = {}
for sheet in self.parent.shortcut_sheets:
self.index_ngs_library_to_pedigree.update(sheet.index_ngs_library_to_pedigree)

@dictify
def _get_input_files_run(self, wildcards):
ngs_mapping = self.parent.sub_workflows["ngs_mapping"]
bams = []
for donor in self.index_ngs_library_to_pedigree[wildcards.library_name].donors:
if donor.dna_ngs_library:
token = f"{wildcards.mapper}.{donor.dna_ngs_library.name}"
bams.append(ngs_mapping(f"output/{token}/out/{token}.bam"))
yield "bam", bams

@dictify
def _get_output_files_run(self):
infix = "{mapper}.manta.{library_name}"
work_files = {
"vcf": f"work/{infix}/out/{infix}.vcf.gz",
"vcf_md5": f"work/{infix}/out/{infix}.vcf.gz.md5",
"vcf_tbi": f"work/{infix}/out/{infix}.vcf.gz.tbi",
"vcf_tbi_md5": f"work/{infix}/out/{infix}.vcf.gz.tbi.md5",
}
yield from augment_work_dir_with_output_links(
work_files, self.get_log_file().values()
).items()
Loading