Skip to content

Commit

Permalink
feat: annotating SV coverage with varfish-annotator (#371)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Feb 9, 2023
1 parent 14d86e3 commit e9fb880
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 9 deletions.
4 changes: 4 additions & 0 deletions snappy_pipeline/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
__author__ = "Manuel Holtgrewe <manuel.holtgrewe@bih-charite.de>"


class SkipLibraryWarning(UserWarning):
"""Raised when libraries are skipped."""


class InvalidConfiguration(Exception):
"""Raised on invalid configuration"""

Expand Down
4 changes: 2 additions & 2 deletions snappy_pipeline/workflows/varfish_export/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ rule varfish_export_varfish_annotator_annotate_svs:
tmpdir=wf.get_resource("varfish_annotator", "annotate_svs", "tmpdir"),
log:
**wf.get_log_file("varfish_annotator", "annotate_svs"),
# params:
# **{"args": wf.get_params("varfish_annotator", "annotate_svs")},
params:
**{"args": wf.get_params("varfish_annotator", "annotate_svs")},
wrapper:
wf.wrapper_path("varfish_annotator/annotate_svs")

Expand Down
70 changes: 63 additions & 7 deletions snappy_pipeline/workflows/varfish_export/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
from matplotlib.cbook import flatten
from snakemake.io import Wildcards, expand

from snappy_pipeline.base import SkipLibraryWarning
from snappy_pipeline.utils import DictQuery, dictify, listify
from snappy_pipeline.workflows.abstract import (
BaseStep,
Expand All @@ -73,6 +74,7 @@
)
from snappy_pipeline.workflows.abstract.common import SnakemakeDict, SnakemakeDictItemsGenerator
from snappy_pipeline.workflows.abstract.warnings import InconsistentPedigreeWarning
from snappy_pipeline.workflows.common.gcnv.gcnv_common import InconsistentLibraryKitsWarning
from snappy_pipeline.workflows.ngs_mapping import NgsMappingWorkflow
from snappy_pipeline.workflows.sv_calling_targeted import SvCallingTargetedWorkflow
from snappy_pipeline.workflows.variant_calling import (
Expand Down Expand Up @@ -210,10 +212,6 @@ def get_result_files(self, action):
"index_ngs_library": list(index_ngs_libraries.keys()),
"mapper": [self.parent.config["tools_ngs_mapping"][0]],
}
# if action == "annotate":
# kwargs["var_caller"] = self.parent.config["tools_variant_calling"]
# elif action == "annotate_svs" and self.parent.config["path_sv_calling_targeted"]:
# kwargs["sv_caller"] = self.parent.config["tools_sv_calling_targeted"]
for path_tpl in path_tpls:
yield from expand(path_tpl, **kwargs)

Expand Down Expand Up @@ -311,19 +309,60 @@ def _get_input_files_annotate_svs(self, wildcards):
if self.parent.config["path_sv_calling_targeted"]:
sv_calling = self.parent.sub_workflows["sv_calling_targeted"]
sv_callers = self.parent.config["tools_sv_calling_targeted"]
elif self.parent.config["sv_calling_wgs"]:
skip_libraries = {
sv_caller: self.parent.w_config["step_config"]["sv_calling_targeted"]
.get(sv_caller, {})
.get("skip_libraries", [])
for sv_caller in sv_callers
}
elif self.parent.config["path_sv_calling_wgs"]:
sv_calling = self.parent.sub_workflows["sv_calling_wgs"]
sv_callers = self.parent.config["tools_sv_calling_wgs"]
sv_callers = self.parent.config["tools_sv_calling_wgs"]["dna"]
skip_libraries = {
sv_caller: self.parent.w_config["step_config"]["sv_calling_wgs"]
.get(sv_caller, {})
.get("skip_libraries", [])
for sv_caller in sv_callers
}
else:
raise RuntimeError("Neither targeted nor WGS SV calling configured")

pedigree = self.index_ngs_library_to_pedigree[wildcards.index_ngs_library]
library_names = [
donor.dna_ngs_library.name for donor in pedigree.donors if donor.dna_ngs_library
]

path = (
"output/{mapper}.{sv_caller}.{index_ngs_library}/"
"out/{mapper}.{sv_caller}.{index_ngs_library}.vcf.gz"
)

vcfs = []
for sv_caller in sv_callers:
if any(map(skip_libraries[sv_caller].__contains__, library_names)):
msg = (
f"Found libraries to skip in family {library_names}. All samples will be skipped "
f"for {sv_caller}."
)
warnings.warn(SkipLibraryWarning(msg))
continue

if sv_caller == "gcnv":
library_kits = [
self.parent.ngs_library_to_kit.get(library_name, "__default__")
for library_name in library_names
]
if len(set(library_kits)) != 1:
names_kits = list(zip(library_names, library_kits))
msg = (
"Found inconsistent library kits (more than one kit!) for pedigree with "
f"index {wildcards.index_ngs_library}. The library name/kit pairs are "
f"{names_kits}. This pedigree will be SKIPPED for gcnv export (as it was "
"skipped for gcnv calls)."
)
warnings.warn(InconsistentLibraryKitsWarning(msg))
continue

vcfs.append(
sv_calling(path).format(
mapper=wildcards.mapper,
Expand All @@ -333,6 +372,20 @@ def _get_input_files_annotate_svs(self, wildcards):
)
yield "vcf", vcfs

ngs_mapping = self.parent.sub_workflows["ngs_mapping"]
pedigree = self.index_ngs_library_to_pedigree[wildcards.index_ngs_library]
library_names = [
donor.dna_ngs_library.name for donor in pedigree.donors if donor.dna_ngs_library
]
path_vcf_cov = (
"output/{mapper}.{library_name}/report/cov/{mapper}.{library_name}.cov.vcf.gz"
)
cov_vcfs = [
ngs_mapping(path_vcf_cov.format(mapper=wildcards.mapper, library_name=library_name))
for library_name in library_names
]
yield "vcf_cov", cov_vcfs

@dictify
def _get_output_files_annotate_svs(self):
prefix = (
Expand All @@ -354,6 +407,9 @@ def _get_output_files_annotate_svs(self):
for work_path in chain(work_paths.values(), self.get_log_file("annotate_svs").values())
]

#: Alias the get params function.
_get_params_annotate_svs = _get_params_annotate

@dictify
def _get_input_files_bam_qc(self, wildcards):
ngs_mapping = self.parent.sub_workflows["ngs_mapping"]
Expand Down Expand Up @@ -457,7 +513,7 @@ def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir)
"sv_calling_targeted", self.config["path_sv_calling_targeted"]
)
if self.config["path_sv_calling_wgs"]:
self.register_sub_workflow("sv_calling_targeted", self.config["path_sv_calling_wgs"])
self.register_sub_workflow("sv_calling_wgs", self.config["path_sv_calling_wgs"])
self.register_sub_workflow("ngs_mapping", self.config["path_ngs_mapping"])

# Copy over "tools" setting from variant_calling/ngs_mapping if not set here
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

__author__ = "Manuel Holtgrewe <manuel.holtgrewe@bih-charite.de>"

# Optionally get path to coverage VCF file.
coverage_vcf = " ".join(getattr(snakemake.input, "vcf_cov", []))

# Get shortcut to configuration of varfish_export step
step_name = snakemake.params.args["step_name"]
export_config = snakemake.config["step_config"][step_name]
Expand Down Expand Up @@ -100,6 +103,12 @@
\
--release {export_config[release]} \
\
$(if [[ "{coverage_vcf}" != "" ]]; then \
for path in {coverage_vcf}; do \
echo --coverage-vcf $path; \
done; \
fi) \
\
--db-path {export_config[path_db]} \
--refseq-ser-path {export_config[path_refseq_ser]} \
--ensembl-ser-path {export_config[path_ensembl_ser]} \
Expand Down

0 comments on commit e9fb880

Please sign in to comment.