refactor!: rename targeted_seq_cnv_calling to sv_calling_targeted (#305…

…) (#306) * refactor!: rename targeted_seq_cnv_calling to sv_calling_targeted (#305) * fixing bug with patching glob.glob too widely
bihealth · Dec 28, 2022 · e234160 · e234160
1 parent 3f36328
commit e234160
Show file tree

Hide file tree

Showing 22 changed files with 274 additions and 286 deletions.
diff --git a/docs/index.rst b/docs/index.rst
@@ -67,7 +67,7 @@ Project Info
     step/somatic_variant_filtration
     step/somatic_wgs_cnv_calling
     step/somatic_wgs_sv_calling
-    step/targeted_seq_cnv_calling
+    step/sv_calling_targeted
     step/targeted_seq_mei_calling
     step/targeted_seq_repeat_analysis
     step/tcell_crg_report

diff --git a/docs/step/targeted_seq_cnv_calling.rst → docs/step/sv_calling_targeted.rst b/docs/step/targeted_seq_cnv_calling.rst → docs/step/sv_calling_targeted.rst
@@ -1,7 +1,7 @@
-.. _step_targeted_seq_cnv_calling:
+.. _step_sv_calling_targeted:
 
 ==================================
 Germline Targeted Seq. CNV Calling
 ==================================
 
-.. automodule:: snappy_pipeline.workflows.targeted_seq_cnv_calling
+.. automodule:: snappy_pipeline.workflows.sv_calling_targeted
diff --git a/snappy_pipeline/apps/snappy_snake.py b/snappy_pipeline/apps/snappy_snake.py
@@ -47,8 +47,8 @@
     somatic_variant_signatures,
     somatic_wgs_cnv_calling,
     somatic_wgs_sv_calling,
+    sv_calling_targeted,
     targeted_seq_cnv_annotation,
-    targeted_seq_cnv_calling,
     targeted_seq_cnv_export,
     targeted_seq_mei_calling,
     tcell_crg_report,
@@ -117,7 +117,7 @@
     "somatic_wgs_cnv_calling": somatic_wgs_cnv_calling,
     "somatic_wgs_sv_calling": somatic_wgs_sv_calling,
     "targeted_seq_cnv_annotation": targeted_seq_cnv_annotation,
-    "targeted_seq_cnv_calling": targeted_seq_cnv_calling,
+    "sv_calling_targeted": sv_calling_targeted,
     "targeted_seq_cnv_export": targeted_seq_cnv_export,
     "targeted_seq_mei_calling": targeted_seq_mei_calling,
     "tcell_crg_report": tcell_crg_report,

diff --git a/snappy_pipeline/workflows/gcnv/__init__.py b/snappy_pipeline/workflows/gcnv/__init__.py
@@ -8,7 +8,7 @@
 - ``helper_gcnv_build_model_wgs``
 - ``helper_gcnv_build_model_targeted_seq``
 - ``wgs_cnv_calling``
-- ``targeted_seq_cnv_calling``
+- ``sv_calling_targeted``
 
 We only implement calling in CASE mode, COHORT mode is only used for building the background
 model.  However, note that we run the CASE mode on all samples from a given sheet.  This may

diff --git a/snappy_pipeline/workflows/gcnv/gcnv_run.py b/snappy_pipeline/workflows/gcnv/gcnv_run.py
@@ -2,7 +2,7 @@
 """Implementation of the gCNV CASE mode run methods.
 """
 
-import glob
+from glob import glob
 from itertools import chain
 import json
 import os
@@ -50,7 +50,7 @@ def get_model_dir_list(pattern):
 
     :return: Returns list with all directories that match the inputted pattern.
     """
-    return [path_ for path_ in glob.glob(pattern) if os.path.isdir(path_)]
+    return [path_ for path_ in glob(pattern) if os.path.isdir(path_)]
 
 
 def get_model_dir_to_dict(pattern):

diff --git a/...kflows/targeted_seq_cnv_calling/Snakefile → ...e/workflows/sv_calling_targeted/Snakefile b/...kflows/targeted_seq_cnv_calling/Snakefile → ...e/workflows/sv_calling_targeted/Snakefile
@@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
-"""CUBI Pipeline targeted_seq_cnv_calling step Snakefile"""
+"""CUBI Pipeline sv_calling_targeted step Snakefile"""
 
 import os
 
 from snappy_pipeline import expand_ref
-from snappy_pipeline.workflows.targeted_seq_cnv_calling import TargetedSeqCnvCallingWorkflow
+from snappy_pipeline.workflows.sv_calling_targeted import SvCallingTargetedWorkflow
 
 __author__ = "Manuel Holtgrewe <manuel.holtgrewe@bihealth.de>"
 
@@ -20,7 +20,7 @@ config, lookup_paths, config_paths = expand_ref("config.yaml", config)
 
 # WorkflowImpl Object Setup ===================================================
 
-wf = TargetedSeqCnvCallingWorkflow(workflow, config, lookup_paths, config_paths, os.getcwd())
+wf = SvCallingTargetedWorkflow(workflow, config, lookup_paths, config_paths, os.getcwd())
 
 # Rules =======================================================================
 

diff --git a/...lows/targeted_seq_cnv_calling/__init__.py → ...workflows/sv_calling_targeted/__init__.py b/...lows/targeted_seq_cnv_calling/__init__.py → ...workflows/sv_calling_targeted/__init__.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-"""Implementation of the ``targeted_seq_cnv_calling`` step
+"""Implementation of the ``sv_calling_targeted`` step
 
 This step allows for the detection of CNV events for germline samples from targeted sequenced
 (e.g., exomes or large panels).  The wrapped tools start from the aligned reads (thus off
@@ -54,7 +54,7 @@
 
 The default configuration is as follows.
 
-.. include:: DEFAULT_CONFIG_targeted_seq_cnv_calling.rst
+.. include:: DEFAULT_CONFIG_sv_calling_targeted.rst
 
 =====================
 Available CNV Callers
@@ -99,11 +99,11 @@
 #: Minimum number of samples using kit - criteria to be analyzed
 MIN_KIT_SAMPLES = 10
 
-#: Default configuration for the targeted_seq_cnv_calling step
+#: Default configuration for the sv_calling_targeted step
 DEFAULT_CONFIG = r"""
-# Default configuration targeted_seq_cnv_calling
+# Default configuration sv_calling_targeted
 step_config:
-  targeted_seq_cnv_calling:
+  sv_calling_targeted:
     # Path to the ngs_mapping step.
     path_ngs_mapping: ../ngs_mapping
 
@@ -223,7 +223,7 @@ def get_params(wildcards):
 
     @dictify
     def _build_ngs_library_to_kit(self):
-        xhmm_config = DictQuery(self.w_config).get("step_config/targeted_seq_cnv_calling/xhmm")
+        xhmm_config = DictQuery(self.w_config).get("step_config/sv_calling_targeted/xhmm")
         if not xhmm_config["path_target_interval_list_mapping"]:
             # No mapping given, we will use the "default" one for all.
             for donor in self.parent.all_donors():
@@ -505,14 +505,14 @@ def get_log_file(action):
         if action == "coverage":
             return (
                 "work/{{mapper}}.xhmm_{action}.{{library_name}}/log/"
-                "snakemake.targeted_seq_cnv_calling.log"
+                "snakemake.sv_calling_targeted.log"
             ).format(action=action)
         elif action == "extract_ped":
-            return "work/{mapper}.xhmm.{library_name}/log/" "snakemake.targeted_seq_cnv_calling.log"
+            return "work/{mapper}.xhmm.{library_name}/log/" "snakemake.sv_calling_targeted.log"
         else:
             return (
                 "work/{{mapper}}.xhmm_{action}.{{library_kit}}/log/"
-                "snakemake.targeted_seq_cnv_calling.log"
+                "snakemake.sv_calling_targeted.log"
             ).format(action=action)
 
     def get_resource_usage(self, action):
@@ -541,7 +541,7 @@ def __init__(self, parent):
 
     @dictify
     def _build_ngs_library_to_kit(self):
-        gcnv_config = DictQuery(self.w_config).get("step_config/targeted_seq_cnv_calling/gcnv")
+        gcnv_config = DictQuery(self.w_config).get("step_config/sv_calling_targeted/gcnv")
         if not gcnv_config["path_target_interval_list_mapping"]:
             # No mapping given, we will use the "default" one for all.
             for donor in self.parent.all_donors():
@@ -563,11 +563,11 @@ def _build_ngs_library_to_kit(self):
         return result
 
 
-class TargetedSeqCnvCallingWorkflow(BaseStep):
+class SvCallingTargetedWorkflow(BaseStep):
     """Perform germline targeted sequencing CNV calling"""
 
     #: Workflow name
-    name = "targeted_seq_cnv_calling"
+    name = "sv_calling_targeted"
 
     sheet_shortcut_class = GermlineCaseSheet
 
@@ -702,6 +702,6 @@ def _yield_result_files(self, tpl, donors, **kwargs):
     def check_config(self):
         """Check that the necessary configuration is available for the step"""
         self.ensure_w_config(
-            config_keys=("step_config", "targeted_seq_cnv_calling", "path_ngs_mapping"),
+            config_keys=("step_config", "sv_calling_targeted", "path_ngs_mapping"),
             msg="Path to NGS mapping not configured but required for targeted seq. CNV calling",
         )
diff --git a/...eted_seq_cnv_calling/cnvetti_homdel.rules → .../sv_calling_targeted/cnvetti_homdel.rules b/...eted_seq_cnv_calling/cnvetti_homdel.rules → .../sv_calling_targeted/cnvetti_homdel.rules
@@ -1,7 +1,7 @@
 # CNVetti homdel needs many rules, thus they are in their own files.
 
 
-rule targeted_seq_cnv_calling_cnvetti_homdel_coverage:
+rule sv_calling_targeted_cnvetti_homdel_coverage:
     input:
         unpack(wf.get_input_files("cnvetti_homdel", "coverage")),
     output:
@@ -14,7 +14,7 @@ rule targeted_seq_cnv_calling_cnvetti_homdel_coverage:
         wf.wrapper_path("cnvetti/targeted_germline_hom/coverage")
 
 
-rule targeted_seq_cnv_calling_cnvetti_homdel_merge_neighbors:
+rule sv_calling_targeted_cnvetti_homdel_merge_neighbors:
     input:
         unpack(wf.get_input_files("cnvetti_homdel", "merge_neighbors")),
     output:
@@ -25,7 +25,7 @@ rule targeted_seq_cnv_calling_cnvetti_homdel_merge_neighbors:
         wf.wrapper_path("cnvetti/targeted_germline_hom/merge_neighbors")
 
 
-rule targeted_seq_cnv_calling_cnvetti_homdel_merge_cohort_vcfs:
+rule sv_calling_targeted_cnvetti_homdel_merge_cohort_vcfs:
     input:
         wf.get_input_files("cnvetti_homdel", "merge_cohort_vcfs"),
     output:
@@ -36,7 +36,7 @@ rule targeted_seq_cnv_calling_cnvetti_homdel_merge_cohort_vcfs:
         wf.wrapper_path("cnvetti/targeted_germline_hom/merge_cohort_vcfs")
 
 
-rule targeted_seq_cnv_calling_cnvetti_homdel_extract_ped:
+rule sv_calling_targeted_cnvetti_homdel_extract_ped:
     input:
         unpack(wf.get_input_files("cnvetti_homdel", "extract_ped")),
     output:

diff --git a/...flows/targeted_seq_cnv_calling/gcnv.rules → .../workflows/sv_calling_targeted/gcnv.rules b/...flows/targeted_seq_cnv_calling/gcnv.rules → .../workflows/sv_calling_targeted/gcnv.rules
@@ -1,7 +1,7 @@
 # gCNV with CASE MODE using precomputed model.
 
 
-rule targeted_seq_cnv_calling_gcnv_preprocess_intervals:
+rule sv_calling_targeted_gcnv_preprocess_intervals:
     input:
         unpack(wf.get_input_files("gcnv", "preprocess_intervals")),
     output:
@@ -17,7 +17,7 @@ rule targeted_seq_cnv_calling_gcnv_preprocess_intervals:
         wf.wrapper_path("gcnv/preprocess_intervals")
 
 
-rule targeted_seq_cnv_calling_gcnv_coverage:
+rule sv_calling_targeted_gcnv_coverage:
     input:
         unpack(wf.get_input_files("gcnv", "coverage")),
     output:
@@ -33,7 +33,7 @@ rule targeted_seq_cnv_calling_gcnv_coverage:
         wf.wrapper_path("gcnv/coverage")
 
 
-rule targeted_seq_cnv_calling_gcnv_contig_ploidy:
+rule sv_calling_targeted_gcnv_contig_ploidy:
     input:
         unpack(wf.get_input_files("gcnv", "contig_ploidy")),
     output:
@@ -51,7 +51,7 @@ rule targeted_seq_cnv_calling_gcnv_contig_ploidy:
         wf.wrapper_path("gcnv/contig_ploidy_case_mode")
 
 
-rule targeted_seq_cnv_calling_gcnv_call_cnvs:
+rule sv_calling_targeted_gcnv_call_cnvs:
     input:
         unpack(wf.get_input_files("gcnv", "call_cnvs")),
     output:
@@ -69,7 +69,7 @@ rule targeted_seq_cnv_calling_gcnv_call_cnvs:
         wf.wrapper_path("gcnv/call_cnvs_case_mode")
 
 
-rule targeted_seq_cnv_calling_gcnv_post_germline_calls:
+rule sv_calling_targeted_gcnv_post_germline_calls:
     input:
         unpack(wf.get_input_files("gcnv", "post_germline_calls")),
     output:
@@ -87,7 +87,7 @@ rule targeted_seq_cnv_calling_gcnv_post_germline_calls:
         wf.wrapper_path("gcnv/post_germline_calls_case_mode")
 
 
-rule targeted_seq_cnv_calling_gcnv_joint_germline_cnv_segmentation:
+rule sv_calling_targeted_gcnv_joint_germline_cnv_segmentation:
     input:
         unpack(wf.get_input_files("gcnv", "joint_germline_cnv_segmentation")),
     output:

diff --git a/...flows/targeted_seq_cnv_calling/xhmm.rules → .../workflows/sv_calling_targeted/xhmm.rules b/...flows/targeted_seq_cnv_calling/xhmm.rules → .../workflows/sv_calling_targeted/xhmm.rules
@@ -1,7 +1,7 @@
 # XHMM needs many rules, thus they are in their own files.
 
 
-rule targeted_seq_cnv_calling_xhmm_coverage:
+rule sv_calling_targeted_xhmm_coverage:
     input:
         unpack(wf.get_input_files("xhmm", "coverage")),
     output:
@@ -19,7 +19,7 @@ rule targeted_seq_cnv_calling_xhmm_coverage:
         wf.wrapper_path("xhmm/gatk_cov")
 
 
-rule targeted_seq_cnv_calling_xhmm_merge_cov:
+rule sv_calling_targeted_xhmm_merge_cov:
     input:
         wf.get_input_files("xhmm", "merge_cov"),
     output:
@@ -35,7 +35,7 @@ rule targeted_seq_cnv_calling_xhmm_merge_cov:
         wf.wrapper_path("xhmm/merge_cov")
 
 
-rule targeted_seq_cnv_calling_xhmm_ref_stats:
+rule sv_calling_targeted_xhmm_ref_stats:
     output:
         **wf.get_output_files("xhmm", "ref_stats"),
     threads: wf.get_resource("xhmm", "ref_stats", "threads")
@@ -49,7 +49,7 @@ rule targeted_seq_cnv_calling_xhmm_ref_stats:
         wf.wrapper_path("xhmm/gatk_ref_stats")
 
 
-rule targeted_seq_cnv_calling_xhmm_filter_center:
+rule sv_calling_targeted_xhmm_filter_center:
     input:
         unpack(wf.get_input_files("xhmm", "filter_center")),
     output:
@@ -65,7 +65,7 @@ rule targeted_seq_cnv_calling_xhmm_filter_center:
         wf.wrapper_path("xhmm/filter_center")
 
 
-rule targeted_seq_cnv_calling_xhmm_pca:
+rule sv_calling_targeted_xhmm_pca:
     input:
         wf.get_input_files("xhmm", "pca"),
     output:
@@ -81,7 +81,7 @@ rule targeted_seq_cnv_calling_xhmm_pca:
         wf.wrapper_path("xhmm/pca")
 
 
-rule targeted_seq_cnv_calling_xhmm_normalize:
+rule sv_calling_targeted_xhmm_normalize:
     input:
         unpack(wf.get_input_files("xhmm", "normalize")),
     output:
@@ -97,7 +97,7 @@ rule targeted_seq_cnv_calling_xhmm_normalize:
         wf.wrapper_path("xhmm/normalize")
 
 
-rule targeted_seq_cnv_calling_xhmm_zscore_center:
+rule sv_calling_targeted_xhmm_zscore_center:
     input:
         wf.get_input_files("xhmm", "zscore_center"),
     output:
@@ -113,7 +113,7 @@ rule targeted_seq_cnv_calling_xhmm_zscore_center:
         wf.wrapper_path("xhmm/zscore_center")
 
 
-rule targeted_seq_cnv_calling_xhmm_refilter:
+rule sv_calling_targeted_xhmm_refilter:
     input:
         unpack(wf.get_input_files("xhmm", "refilter")),
     output:
@@ -129,7 +129,7 @@ rule targeted_seq_cnv_calling_xhmm_refilter:
         wf.wrapper_path("xhmm/refilter")
 
 
-rule targeted_seq_cnv_calling_xhmm_discover:
+rule sv_calling_targeted_xhmm_discover:
     input:
         unpack(wf.get_input_files("xhmm", "discover")),
     output:
@@ -145,7 +145,7 @@ rule targeted_seq_cnv_calling_xhmm_discover:
         wf.wrapper_path("xhmm/discover")
 
 
-rule targeted_seq_cnv_calling_xhmm_genotype:
+rule sv_calling_targeted_xhmm_genotype:
     input:
         unpack(wf.get_input_files("xhmm", "genotype")),
     output:
@@ -161,7 +161,7 @@ rule targeted_seq_cnv_calling_xhmm_genotype:
         wf.wrapper_path("xhmm/genotype")
 
 
-rule targeted_seq_cnv_calling_xhmm_extract_ped:
+rule sv_calling_targeted_xhmm_extract_ped:
     input:
         unpack(wf.get_input_files("xhmm", "extract_ped")),
     output: