Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: enabled mbcs meta-tool in the ngs_mapping step model #545

Merged
merged 5 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions snappy_pipeline/workflows/hla_typing/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ class HlaTyping(SnappyStepModel, validators.ToolsMixin, validators.NgsMappingMix

tools: Annotated[list[Tool], EnumField(Tool, [Tool.optitype], min_length=1)]

optitype: Optitype | None = None
optitype: Optitype = Optitype()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before I forget: This is a design decision we must make.

  • Either we always provide all the defaults, and substeps are always registered irrespective of whether they're used or not, or
  • we do not provide defaults but force None, in which case substeps must not be registered if the config doesn't provide the substeps and all functions in the workflow definitions (in the init.py files) must not access those non-existent config definitions (e.g. int get_output_files etc).

Copy link
Contributor Author

@ericblanc20 ericblanc20 Dec 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am tempted to always provide defaults. It makes the __init__.py code easier, and I personally don't like to see:

step_config:
   hla_typing:
    tools: [optitype]
    optitype: {}

For me, the syntax optitype: {} suggests that the tool doesn't accept parameters, not that we use the defaults.
Also, I wouldn't always set all defaults for more complex steps/tools. If there isn't an obvious choice for a default, then the user should make a decision.

But again, I am prepared to be convinced otherwise.


arcashla: ArcasHla | None = None
arcashla: ArcasHla = ArcasHla()
2 changes: 2 additions & 0 deletions snappy_pipeline/workflows/ngs_data_qc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ def _get_input_files_metrics(self, wildcards):

@dictify
def get_output_files(self, action):
if self.name not in self.config.tools:
return {}
Comment on lines +171 to +172
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also touches on the comment above

if action == "prepare":
yield "baits", "work/static_data/picard/out/baits.interval_list"
yield "targets", "work/static_data/picard/out/targets.interval_list"
Expand Down
21 changes: 19 additions & 2 deletions snappy_pipeline/workflows/ngs_mapping/model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import enum
import itertools
import os
from enum import Enum
from typing import Annotated
Expand All @@ -21,8 +22,23 @@ class RnaMapper(Enum):
STAR = "star"


class MetaTool(Enum):
MBCS = "mbcs"


CombinedDnaTool = Enum(
"CombinedDnaTool",
{
(name, member.value)
for name, member in itertools.chain(
DnaMapper.__members__.items(), MetaTool.__members__.items()
)
},
)


class Tools(SnappyModel):
dna: Annotated[list[DnaMapper], EnumField(DnaMapper, [])]
dna: Annotated[list[CombinedDnaTool], EnumField(CombinedDnaTool, [])]
"""Required if DNA analysis; otherwise, leave empty."""

rna: Annotated[list[RnaMapper], EnumField(RnaMapper, [])]
Expand Down Expand Up @@ -261,6 +277,7 @@ class Minimap2(SnappyModel):

class Mbcs(SnappyModel):
mapping_tool: DnaMapper
barcode_tool: BarcodeTool
use_barcodes: bool
recalibrate: bool

Expand All @@ -287,7 +304,7 @@ class NgsMapping(SnappyStepModel):
bwa_mem2: BwaMem2 | None = None
"""Configuration for BWA-MEM2"""

somatic: Somatic | None = None
mbcs: Mbcs | None = None
"""
Configuration for somatic ngs_calling
(separate read groups, molecular barcodes & base quality recalibration)
Expand Down
2 changes: 1 addition & 1 deletion snappy_wrappers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

from snappy_pipeline.version import __version__
# from snappy_pipeline.version import __version__

__author__ = """Manuel Holtgrewe"""
__email__ = "manuel.holtgrewe@bih-charite.de"
Expand Down
2 changes: 1 addition & 1 deletion snappy_wrappers/wrappers/mbcs/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def pair_fastq_files(input_left, input_right):
input_left = snakemake.params.args["input"]["reads_left"]
input_right = snakemake.params.args["input"].get("reads_right", "")

config = snakemake.config["step_config"]["ngs_mapping"]["somatic"]
config = snakemake.config["step_config"]["ngs_mapping"]["mbcs"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for now, we can do it like this, and then later move those config accesses to the parameters

mapper = config["mapping_tool"]
mapper_config = snakemake.config["step_config"]["ngs_mapping"][mapper]
if mapper == "bwa_mem2":
Expand Down
35 changes: 26 additions & 9 deletions tests/snappy_pipeline/workflows/test_workflows_ngs_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,26 @@ def minimal_config():
step_config:
ngs_mapping:
tools:
dna: ['bwa']
dna: ['mbcs']
target_coverage_report:
path_target_interval_list_mapping:
- pattern: "Agilent SureSelect Human All Exon V6.*"
name: Agilent_SureSelect_Human_All_Exon_V6
path: path/to/SureSelect_Human_All_Exon_V6_r2.bed
ngs_chew_fingerprint:
enabled: true
bwa:
path_index: /path/to/bwa/index.fasta.amb
bwa_mem2:
path_index: /path/to/bwa_mem2/index.fasta.amb
trim_adapters: false
mask_duplicates: true
num_threads_align: 16
num_threads_trimming: 8
num_threads_bam_view: 4
num_threads_bam_sort: 4
memory_bam_sort: 4G
split_as_secondary: false # -M flag
minimap2:
mapping_threads: 16
star:
Expand All @@ -51,7 +61,8 @@ def minimal_config():
out_filter_intron_motifs: ""
out_sam_strand_field: ""
mbcs:
mapping_tool: bwa
mapping_tool: bwa_mem2
barcode_tool: agent
use_barcodes: True
recalibrate: True
bqsr:
Expand All @@ -60,10 +71,16 @@ def minimal_config():
prepare:
path: /path/to/trimmer
lib_prep_type: v2
extra_args:
- "-polyG 8"
- "-minFractionRead 50"
mark_duplicates:
path: /path/to/creak
path_baits: /path/to/baits
consensus_mode: HYBRID
extra_args: []
input_filter_args: []
consensus_filter_args: []
bam_collect_doc:
enabled: true

Expand Down Expand Up @@ -712,18 +729,18 @@ def test_ngs_mapping_workflow_steps(ngs_mapping_workflow):

def test_ngs_mapping_workflow_files(ngs_mapping_workflow):
"""Tests simple functionality of the workflow: checks if file structure is created according
to the expected results from the tools, namely: bwa, external, link_in, link_out,
to the expected results from the tools, namely: mbcs, external, link_in, link_out,
link_out_bam, minimap2, star, target_coverage_report.
"""
# Check result file construction
expected = [
"output/bwa.P00{i}-N1-DNA1-WGS1/out/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext)
"output/mbcs.P00{i}-N1-DNA1-WGS1/out/mbcs.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext)
for i in range(1, 7)
for ext in ("bam", "bam.bai", "bam.md5", "bam.bai.md5")
]
for infix in ("bam_collect_doc", "mapping", "target_cov_report", "ngs_chew_fingerprint"):
expected += [
"output/bwa.P00{i}-N1-DNA1-WGS1/log/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext)
"output/mbcs.P00{i}-N1-DNA1-WGS1/log/mbcs.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext)
for i in range(1, 7)
for ext in (
f"{infix}.log",
Expand All @@ -739,7 +756,7 @@ def test_ngs_mapping_workflow_files(ngs_mapping_workflow):
)
]
bam_stats_text_out = (
"output/bwa.P00{i}-N1-DNA1-WGS1/report/bam_qc/bwa.P00{i}-N1-DNA1-WGS1.bam.{stats}.{ext}"
"output/mbcs.P00{i}-N1-DNA1-WGS1/report/bam_qc/mbcs.P00{i}-N1-DNA1-WGS1.bam.{stats}.{ext}"
)
expected += [
bam_stats_text_out.format(i=i, stats=stats, ext=ext)
Expand All @@ -748,7 +765,7 @@ def test_ngs_mapping_workflow_files(ngs_mapping_workflow):
for stats in ("bamstats", "flagstats", "idxstats")
]
expected += [
"output/bwa.P00{i}-N1-DNA1-WGS1/report/cov/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format(
"output/mbcs.P00{i}-N1-DNA1-WGS1/report/cov/mbcs.P00{i}-N1-DNA1-WGS1.{ext}".format(
i=i, ext=ext
)
for ext in (
Expand All @@ -764,14 +781,14 @@ def test_ngs_mapping_workflow_files(ngs_mapping_workflow):
for i in range(1, 7)
]
expected += [
"output/bwa.P00{i}-N1-DNA1-WGS1/report/fingerprint/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format(
"output/mbcs.P00{i}-N1-DNA1-WGS1/report/fingerprint/mbcs.P00{i}-N1-DNA1-WGS1.{ext}".format(
i=i, ext=ext
)
for ext in ("npz", "npz.md5")
for i in range(1, 7)
]
expected += [
"output/bwa.P00{i}-N1-DNA1-WGS1/report/alfred_qc/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format(
"output/mbcs.P00{i}-N1-DNA1-WGS1/report/alfred_qc/mbcs.P00{i}-N1-DNA1-WGS1.{ext}".format(
i=i, ext=ext
)
for ext in ("alfred.json.gz", "alfred.json.gz.md5")
Expand Down
Loading