Skip to content

Commit

Permalink
feat: support for somatic variant calling without normal (#503)
Browse files Browse the repository at this point in the history
  • Loading branch information
ericblanc20 authored May 6, 2024
1 parent d7420f7 commit 66f555c
Show file tree
Hide file tree
Showing 14 changed files with 678 additions and 555 deletions.
1 change: 1 addition & 0 deletions .tests/test-workflow/config/cancer_wes/config.yaml.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ step_config:
somatic_variant_filtration:
path_somatic_variant: ../somatic_variant_annotation
path_ngs_mapping: ../ngs_mapping
filtration_schema: list
filter_list:
- dkfz: {}
- bcftools:
Expand Down
8 changes: 8 additions & 0 deletions docs/step_generic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,11 @@ The versioning allows the pipeline step to check whether there are incompatibili

The execution of ``cubi-snake`` in a directory will not automatically generate these files.
Rather, they are only generated when used in a pipeline step such as ``somatic_targeted_cnv_calling``.

.. note:: **Debugging configuration errors**

The configuration for a complete pipeline can be long, and copy/paste errors can creep in.
When facing errors in configuration, it is advisable to

- Use absolute paths for files not part of the workflow (panel of normal files are **NOT** part the of workflow), and
- Use the ``snappy-snake --verbose``, which prints the step configuration (and all the dependend workflows) as ``snappy`` has understood it.
11 changes: 11 additions & 0 deletions snappy_pipeline/workflows/abstract/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,9 @@ def __init__(
#: Functions from sub workflows, can be used to generate output paths into these workflows
self.sub_workflows = {}

if workflow.verbose:
self._write_step_config()

def _setup_hooks(self):
"""Setup Snakemake workflow hooks for start/end/error"""
# In the following, the "log" parameter to the handler functions is set to "_" as we
Expand Down Expand Up @@ -931,6 +934,14 @@ def _load_data_search_infos(self):
mixed_se_pe=False,
)

def _write_step_config(self, f=sys.stdout):
print(f"\n\n----- Configuration for step {self.name}:\n", file=f)
yaml = ruamel_yaml.YAML()
yaml.preserve_quotes = True
yaml.indent(sequence=4, mapping=4, offset=4)
yaml.dump(self.config, f)
print(f"\n------ Configuration for {self.name} ends here\n", file=f)

@classmethod
def wrapper_path(cls, path):
"""Generate path to wrapper"""
Expand Down
31 changes: 15 additions & 16 deletions snappy_pipeline/workflows/somatic_variant_annotation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,8 @@ def params_function(wildcards):

def get_normal_lib_name(self, wildcards):
"""Return name of normal (non-cancer) library"""
pair = self.tumor_ngs_library_to_sample_pair[wildcards.tumor_library]
return pair.normal_sample.dna_ngs_library.name
pair = self.tumor_ngs_library_to_sample_pair.get(wildcards.tumor_library, None)
return pair.normal_sample.dna_ngs_library.name if pair else None


class JannovarAnnotateSomaticVcfStepPart(AnnotateSomaticVcfStepPart):
Expand Down Expand Up @@ -426,20 +426,19 @@ def _yield_result_files_matched(self, tpl, **kwargs):
Mutect.
"""
for sheet in filter(is_not_background, self.shortcut_sheets):
for sample_pair in sheet.all_sample_pairs:
if (
not sample_pair.tumor_sample.dna_ngs_library
or not sample_pair.normal_sample.dna_ngs_library
):
msg = (
"INFO: sample pair for cancer bio sample {} has is missing primary"
"normal or primary cancer NGS library"
)
print(msg.format(sample_pair.tumor_sample.name), file=sys.stderr)
continue
yield from expand(
tpl, tumor_library=[sample_pair.tumor_sample.dna_ngs_library], **kwargs
)
for bio_entity in sheet.sheet.bio_entities.values():
for bio_sample in bio_entity.bio_samples.values():
if not bio_sample.extra_infos.get("isTumor", False):
continue
for test_sample in bio_sample.test_samples.values():
extraction_type = test_sample.extra_infos.get("extractionType", "unknown")
if extraction_type.lower() != "dna":
if extraction_type == "unknown":
msg = "INFO: sample {} has missing extraction type, ignored"
print(msg.format(test_sample.name), file=sys.stderr)
continue
for ngs_library in test_sample.ngs_libraries.values():
yield from expand(tpl, tumor_library=[ngs_library], **kwargs)

def _yield_result_files_joint(self, tpl, **kwargs):
"""Build output paths from path template and extension list.
Expand Down
92 changes: 56 additions & 36 deletions snappy_pipeline/workflows/somatic_variant_calling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@
germline_resource: '' # Germline variants resource (same as panel of normals)
common_variants: '' # Common germline variants for contamination estimation
extra_arguments: [] # List additional Mutect2 arguments
# Each additional argument xust be in the form:
# Each additional argument must be in the form:
# "--<argument name> <argument value>"
# For example, to filter reads prior to calling & to
# add annotations to the output vcf:
Expand Down Expand Up @@ -387,32 +387,41 @@ def input_function(wildcards):
ngs_mapping = self.parent.sub_workflows["ngs_mapping"]
# Get names of primary libraries of the selected cancer bio sample and the
# corresponding primary normal sample
normal_base_path = (
"output/{mapper}.{normal_library}/out/{mapper}.{normal_library}".format(
normal_library=self.get_normal_lib_name(wildcards), **wildcards
)
)
tumor_base_path = (
"output/{mapper}.{tumor_library}/out/" "{mapper}.{tumor_library}"
).format(**wildcards)
return {
"normal_bam": ngs_mapping(normal_base_path + ".bam"),
"normal_bai": ngs_mapping(normal_base_path + ".bam.bai"),
input_files = {
"tumor_bam": ngs_mapping(tumor_base_path + ".bam"),
"tumor_bai": ngs_mapping(tumor_base_path + ".bam.bai"),
}

normal_library = self.get_normal_lib_name(wildcards)
if normal_library:
normal_base_path = (
"output/{mapper}.{normal_library}/out/{mapper}.{normal_library}".format(
normal_library=normal_library, **wildcards
)
)
input_files.update(
{
"normal_bam": ngs_mapping(normal_base_path + ".bam"),
"normal_bai": ngs_mapping(normal_base_path + ".bam.bai"),
}
)

return input_files

return input_function

def get_normal_lib_name(self, wildcards):
"""Return name of normal (non-cancer) library"""
pair = self.tumor_ngs_library_to_sample_pair[wildcards.tumor_library]
return pair.normal_sample.dna_ngs_library.name
pair = self.tumor_ngs_library_to_sample_pair.get(wildcards.tumor_library, None)
return pair.normal_sample.dna_ngs_library.name if pair else None

def get_tumor_lib_name(self, wildcards):
"""Return name of tumor library"""
pair = self.tumor_ngs_library_to_sample_pair[wildcards.tumor_library]
return pair.tumor_sample.dna_ngs_library.name
pair = self.tumor_ngs_library_to_sample_pair.get(wildcards.tumor_library, None)
return pair.tumor_sample.dna_ngs_library.name if pair else wildcards.tumor_library

def get_output_files(self, action):
"""Return output files that all somatic variant calling sub steps must
Expand Down Expand Up @@ -577,19 +586,30 @@ def _get_input_files_run(self, wildcards):
ngs_mapping = self.parent.sub_workflows["ngs_mapping"]
# Get names of primary libraries of the selected cancer bio sample and the
# corresponding primary normal sample
normal_base_path = "output/{mapper}.{normal_library}/out/{mapper}.{normal_library}".format(
normal_library=self.get_normal_lib_name(wildcards), **wildcards
)
tumor_base_path = (
"output/{mapper}.{tumor_library}/out/" "{mapper}.{tumor_library}"
).format(**wildcards)
return {
"normal_bam": ngs_mapping(normal_base_path + ".bam"),
"normal_bai": ngs_mapping(normal_base_path + ".bam.bai"),
input_files = {
"tumor_bam": ngs_mapping(tumor_base_path + ".bam"),
"tumor_bai": ngs_mapping(tumor_base_path + ".bam.bai"),
}

normal_library = self.get_normal_lib_name(wildcards)
if normal_library:
normal_base_path = (
"output/{mapper}.{normal_library}/out/{mapper}.{normal_library}".format(
normal_library=normal_library, **wildcards
)
)
input_files.update(
{
"normal_bam": ngs_mapping(normal_base_path + ".bam"),
"normal_bai": ngs_mapping(normal_base_path + ".bam.bai"),
}
)

return input_files

def _get_input_files_filter(self, wildcards):
"""Get input files for rule ``filter``.
Expand All @@ -609,9 +629,10 @@ def _get_input_files_filter(self, wildcards):
"stats": base_path + ".raw.vcf.stats",
"f1r2": base_path + ".raw.f1r2_tar.tar.gz",
}
if "contamination" in self.actions:
input_files["table"] = base_path + ".contamination.tbl"
input_files["segments"] = base_path + ".segments.tbl"
if self.get_normal_lib_name(wildcards):
if "contamination" in self.actions:
input_files["table"] = base_path + ".contamination.tbl"
input_files["segments"] = base_path + ".segments.tbl"
return input_files

def _get_input_files_pileup_normal(self, wildcards):
Expand Down Expand Up @@ -1196,20 +1217,19 @@ def _yield_result_files_matched(self, tpl, **kwargs):
Mutect.
"""
for sheet in filter(is_not_background, self.shortcut_sheets):
for sample_pair in sheet.all_sample_pairs:
if (
not sample_pair.tumor_sample.dna_ngs_library
or not sample_pair.normal_sample.dna_ngs_library
):
msg = (
"INFO: sample pair for cancer bio sample {} has is missing primary"
"normal or primary cancer NGS library"
)
print(msg.format(sample_pair.tumor_sample.name), file=sys.stderr)
continue
yield from expand(
tpl, tumor_library=[sample_pair.tumor_sample.dna_ngs_library], **kwargs
)
for bio_entity in sheet.sheet.bio_entities.values():
for bio_sample in bio_entity.bio_samples.values():
if not bio_sample.extra_infos.get("isTumor", False):
continue
for test_sample in bio_sample.test_samples.values():
extraction_type = test_sample.extra_infos.get("extractionType", "unknown")
if extraction_type.lower() != "dna":
if extraction_type == "unknown":
msg = "INFO: sample {} has missing extraction type, ignored"
print(msg.format(test_sample.name), file=sys.stderr)
continue
for ngs_library in test_sample.ngs_libraries.values():
yield from expand(tpl, tumor_library=[ngs_library], **kwargs)

def _yield_result_files_joint(self, tpl, **kwargs):
"""Build output paths from path template and extension list.
Expand Down
Loading

0 comments on commit 66f555c

Please sign in to comment.