bihealth · ericblanc20 · May 6, 2024 · Apr 24, 2024 · Apr 24, 2024 · Apr 24, 2024
diff --git a/snappy_pipeline/workflows/somatic_variant_annotation/__init__.py b/snappy_pipeline/workflows/somatic_variant_annotation/__init__.py
@@ -75,7 +75,6 @@
 
 from collections import OrderedDict
 import os
-import sys
 
 from biomedsheets.shortcuts import CancerCaseSheet, CancerCaseSheetOptions, is_not_background
 from snakemake.io import expand
@@ -238,8 +237,8 @@ def params_function(wildcards):
 
     def get_normal_lib_name(self, wildcards):
         """Return name of normal (non-cancer) library"""
-        pair = self.tumor_ngs_library_to_sample_pair[wildcards.tumor_library]
-        return pair.normal_sample.dna_ngs_library.name
+        pair = self.tumor_ngs_library_to_sample_pair.get(wildcards.tumor_library, None)
+        return pair.normal_sample.dna_ngs_library.name if pair else None
 
 
 class JannovarAnnotateSomaticVcfStepPart(AnnotateSomaticVcfStepPart):
@@ -426,20 +425,16 @@ def _yield_result_files_matched(self, tpl, **kwargs):
         Mutect.
         """
         for sheet in filter(is_not_background, self.shortcut_sheets):
-            for sample_pair in sheet.all_sample_pairs:
-                if (
-                    not sample_pair.tumor_sample.dna_ngs_library
-                    or not sample_pair.normal_sample.dna_ngs_library
-                ):
-                    msg = (
-                        "INFO: sample pair for cancer bio sample {} has is missing primary"
-                        "normal or primary cancer NGS library"
-                    )
-                    print(msg.format(sample_pair.tumor_sample.name), file=sys.stderr)
-                    continue
-                yield from expand(
-                    tpl, tumor_library=[sample_pair.tumor_sample.dna_ngs_library], **kwargs
-                )
+            for bio_entity in sheet.sheet.bio_entities.values():
+                for bio_sample in bio_entity.bio_samples.values():
+                    if not bio_sample.extra_infos.get("isTumor", False):
+                        continue
+                    for test_sample in bio_sample.test_samples.values():
+                        extraction_type = test_sample.extra_infos.get("extractionType", "unknown")
+                        if extraction_type.lower() != "dna":
+                            continue
+                        for ngs_library in test_sample.ngs_libraries.values():
+                            yield from expand(tpl, tumor_library=[ngs_library], **kwargs)
 
     def _yield_result_files_joint(self, tpl, **kwargs):
         """Build output paths from path template and extension list.

diff --git a/snappy_pipeline/workflows/somatic_variant_calling/__init__.py b/snappy_pipeline/workflows/somatic_variant_calling/__init__.py
@@ -101,7 +101,6 @@
 from collections import OrderedDict
 from itertools import chain
 import os
-import sys
 
 from biomedsheets.shortcuts import CancerCaseSheet, CancerCaseSheetOptions, is_not_background
 from snakemake.io import expand
@@ -241,7 +240,7 @@
       germline_resource: ''     # Germline variants resource (same as panel of normals)
       common_variants: ''       # Common germline variants for contamination estimation
       extra_arguments: []       # List additional Mutect2 arguments
-                                # Each additional argument xust be in the form:
+                                # Each additional argument must be in the form:
                                 # "--<argument name> <argument value>"
                                 # For example, to filter reads prior to calling & to
                                 # add annotations to the output vcf:
@@ -387,32 +386,39 @@ def input_function(wildcards):
             ngs_mapping = self.parent.sub_workflows["ngs_mapping"]
             # Get names of primary libraries of the selected cancer bio sample and the
             # corresponding primary normal sample
-            normal_base_path = (
-                "output/{mapper}.{normal_library}/out/{mapper}.{normal_library}".format(
-                    normal_library=self.get_normal_lib_name(wildcards), **wildcards
-                )
-            )
             tumor_base_path = (
                 "output/{mapper}.{tumor_library}/out/" "{mapper}.{tumor_library}"
             ).format(**wildcards)
-            return {
-                "normal_bam": ngs_mapping(normal_base_path + ".bam"),
-                "normal_bai": ngs_mapping(normal_base_path + ".bam.bai"),
-                "tumor_bam": ngs_mapping(tumor_base_path + ".bam"),
-                "tumor_bai": ngs_mapping(tumor_base_path + ".bam.bai"),
-            }
+            normal_library = self.get_normal_lib_name(wildcards)
+            if normal_library:
+                normal_base_path = (
+                    "output/{mapper}.{normal_library}/out/{mapper}.{normal_library}".format(
+                        normal_library=normal_library, **wildcards
+                    )
+                )
+                return {
+                    "normal_bam": ngs_mapping(normal_base_path + ".bam"),
+                    "normal_bai": ngs_mapping(normal_base_path + ".bam.bai"),
+                    "tumor_bam": ngs_mapping(tumor_base_path + ".bam"),
+                    "tumor_bai": ngs_mapping(tumor_base_path + ".bam.bai"),
+                }
+            else:
+                return {
+                    "tumor_bam": ngs_mapping(tumor_base_path + ".bam"),
+                    "tumor_bai": ngs_mapping(tumor_base_path + ".bam.bai"),
+                }
 
         return input_function
 
     def get_normal_lib_name(self, wildcards):
         """Return name of normal (non-cancer) library"""
-        pair = self.tumor_ngs_library_to_sample_pair[wildcards.tumor_library]
-        return pair.normal_sample.dna_ngs_library.name
+        pair = self.tumor_ngs_library_to_sample_pair.get(wildcards.tumor_library, None)
+        return pair.normal_sample.dna_ngs_library.name if pair else None
 
     def get_tumor_lib_name(self, wildcards):
         """Return name of tumor library"""
-        pair = self.tumor_ngs_library_to_sample_pair[wildcards.tumor_library]
-        return pair.tumor_sample.dna_ngs_library.name
+        pair = self.tumor_ngs_library_to_sample_pair.get(wildcards.tumor_library, None)
+        return pair.tumor_sample.dna_ngs_library.name if pair else wildcards.tumor_library
 
     def get_output_files(self, action):
         """Return output files that all somatic variant calling sub steps must
@@ -577,18 +583,27 @@ def _get_input_files_run(self, wildcards):
         ngs_mapping = self.parent.sub_workflows["ngs_mapping"]
         # Get names of primary libraries of the selected cancer bio sample and the
         # corresponding primary normal sample
-        normal_base_path = "output/{mapper}.{normal_library}/out/{mapper}.{normal_library}".format(
-            normal_library=self.get_normal_lib_name(wildcards), **wildcards
-        )
         tumor_base_path = (
             "output/{mapper}.{tumor_library}/out/" "{mapper}.{tumor_library}"
         ).format(**wildcards)
-        return {
-            "normal_bam": ngs_mapping(normal_base_path + ".bam"),
-            "normal_bai": ngs_mapping(normal_base_path + ".bam.bai"),
-            "tumor_bam": ngs_mapping(tumor_base_path + ".bam"),
-            "tumor_bai": ngs_mapping(tumor_base_path + ".bam.bai"),
-        }
+        normal_library = self.get_normal_lib_name(wildcards)
+        if normal_library:
+            normal_base_path = (
+                "output/{mapper}.{normal_library}/out/{mapper}.{normal_library}".format(
+                    normal_library=normal_library, **wildcards
+                )
+            )
+            return {
+                "normal_bam": ngs_mapping(normal_base_path + ".bam"),
+                "normal_bai": ngs_mapping(normal_base_path + ".bam.bai"),
+                "tumor_bam": ngs_mapping(tumor_base_path + ".bam"),
+                "tumor_bai": ngs_mapping(tumor_base_path + ".bam.bai"),
+            }
+        else:
+            return {
+                "tumor_bam": ngs_mapping(tumor_base_path + ".bam"),
+                "tumor_bai": ngs_mapping(tumor_base_path + ".bam.bai"),
+            }
 
     def _get_input_files_filter(self, wildcards):
         """Get input files for rule ``filter``.
@@ -609,9 +624,10 @@ def _get_input_files_filter(self, wildcards):
             "stats": base_path + ".raw.vcf.stats",
             "f1r2": base_path + ".raw.f1r2_tar.tar.gz",
         }
-        if "contamination" in self.actions:
-            input_files["table"] = base_path + ".contamination.tbl"
-            input_files["segments"] = base_path + ".segments.tbl"
+        if self.get_normal_lib_name(wildcards):
+            if "contamination" in self.actions:
+                input_files["table"] = base_path + ".contamination.tbl"
+                input_files["segments"] = base_path + ".segments.tbl"
         return input_files
 
     def _get_input_files_pileup_normal(self, wildcards):
@@ -1196,20 +1212,16 @@ def _yield_result_files_matched(self, tpl, **kwargs):
         Mutect.
         """
         for sheet in filter(is_not_background, self.shortcut_sheets):
-            for sample_pair in sheet.all_sample_pairs:
-                if (
-                    not sample_pair.tumor_sample.dna_ngs_library
-                    or not sample_pair.normal_sample.dna_ngs_library
-                ):
-                    msg = (
-                        "INFO: sample pair for cancer bio sample {} has is missing primary"
-                        "normal or primary cancer NGS library"
-                    )
-                    print(msg.format(sample_pair.tumor_sample.name), file=sys.stderr)
-                    continue
-                yield from expand(
-                    tpl, tumor_library=[sample_pair.tumor_sample.dna_ngs_library], **kwargs
-                )
+            for bio_entity in sheet.sheet.bio_entities.values():
+                for bio_sample in bio_entity.bio_samples.values():
+                    if not bio_sample.extra_infos.get("isTumor", False):
+                        continue
+                    for test_sample in bio_sample.test_samples.values():
+                        extraction_type = test_sample.extra_infos.get("extractionType", "unknown")
+                        if extraction_type.lower() != "dna":
+                            continue
+                        for ngs_library in test_sample.ngs_libraries.values():
+                            yield from expand(tpl, tumor_library=[ngs_library], **kwargs)
 
     def _yield_result_files_joint(self, tpl, **kwargs):
         """Build output paths from path template and extension list.