Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: replacing custom coverage QC scripts with "alfred qc" (#481) #482

Merged
merged 10 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 1 addition & 21 deletions snappy_pipeline/workflows/ngs_mapping/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -203,27 +203,7 @@ rule ngs_mapping_target_coverage_report_run:
log:
**wf.get_log_file("target_coverage_report", "run"),
wrapper:
wf.wrapper_path("target_cov_report/run")


# Collect target region reports -----------------------------------------------


rule ngs_mapping_target_coverage_report_collect:
input:
wf.get_input_files("target_coverage_report", "collect"),
output:
**wf.get_output_files("target_coverage_report", "collect"),
threads: wf.get_resource("target_coverage_report", "run", "threads")
resources:
time=wf.get_resource("target_coverage_report", "run", "time"),
memory=wf.get_resource("target_coverage_report", "run", "memory"),
partition=wf.get_resource("target_coverage_report", "run", "partition"),
tmpdir=wf.get_resource("target_coverage_report", "run", "tmpdir"),
log:
**wf.get_log_file("target_coverage_report", "collect"),
wrapper:
wf.wrapper_path("target_cov_report/collect")
wf.wrapper_path("alfred/qc")


# Compute depth of coverage files (VCF and bigWig) ----------------------------
Expand Down
46 changes: 6 additions & 40 deletions snappy_pipeline/workflows/ngs_mapping/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,7 @@
rna: [] # Required if RNA analysis; otherwise, leave empty. Example: 'star'.
dna_long: [] # Required if long-read mapper used; otherwise, leave empty. Example: 'minimap2'.
path_link_in: "" # OPTIONAL Override data set configuration search paths for FASTQ files
# Thresholds for targeted sequencing coverage QC. Enabled by specifying
# the path_arget_regions setting above
# Thresholds for targeted sequencing coverage QC.
target_coverage_report:
# Mapping from enrichment kit to target region BED file, for either computing per--target
# region coverage or selecting targeted exons.
Expand All @@ -354,11 +353,6 @@
# pattern: "xGen Exome Research Panel V1\\.0*"
# path: "path/to/targets.bed"
path_target_interval_list_mapping: []
# Maximal/minimal/warning coverage
max_coverage: 200
min_cov_warning: 20 # >= 20x for WARNING
min_cov_ok: 50 # >= 50x for OK
detailed_reporting: false # per-exon details (cannot go into multiqc)
# Depth of coverage collection, mainly useful for genomes.
bam_collect_doc:
enabled: false
Expand Down Expand Up @@ -1128,7 +1122,7 @@ def get_resource_usage(self, action):
)


class TargetCoverageReportStepPart(ReportGetResultFilesMixin, BaseStepPart):
class TargetCovReportStepPart(ReportGetResultFilesMixin, BaseStepPart):
"""Build target coverage report"""

#: Step name
Expand All @@ -1138,7 +1132,7 @@ class TargetCoverageReportStepPart(ReportGetResultFilesMixin, BaseStepPart):
tool_categories = ("dna", "dna_long")

#: Class available actions
actions = ("run", "collect")
actions = ("run",)

def __init__(self, parent):
super().__init__(parent)
Expand All @@ -1154,25 +1148,6 @@ def _get_input_files_run(self, wildcards):
yield "bam", f"work/{mapper_lib}/out/{mapper_lib}.bam"
yield "bai", f"work/{mapper_lib}/out/{mapper_lib}.bam.bai"

@listify
def _get_input_files_collect(self, wildcards):
_ = wildcards
for sheet in self.parent.shortcut_sheets:
for ngs_library in sheet.all_ngs_libraries:
extraction_type = ngs_library.test_sample.extra_infos.get("extractionType", "DNA")
if ngs_library.extra_infos["seqPlatform"] in ("ONP", "PacBio"):
suffix = "_long"
else:
suffix = ""
for mapper in self.config["tools"][extraction_type.lower() + suffix]:
if (
self.parent.default_kit_configured
or ngs_library.name in self.parent.ngs_library_to_kit
):
yield self._get_output_files_run_work()["txt"].format(
mapper=mapper, library_name=ngs_library.name
)

@dictify
def get_output_files(self, action):
"""Return output files"""
Expand All @@ -1193,13 +1168,8 @@ def get_output_files(self, action):

@dictify
def _get_output_files_run_work(self):
yield "txt", "work/{mapper}.{library_name}/report/cov_qc/{mapper}.{library_name}.txt"
yield "txt_md5", "work/{mapper}.{library_name}/report/cov_qc/{mapper}.{library_name}.txt.md5"

@dictify
def _get_output_files_collect_work(self):
yield "txt", "work/target_cov_report/out/target_cov_report.txt"
yield "txt_md5", "work/target_cov_report/out/target_cov_report.txt.md5"
yield "json", "work/{mapper}.{library_name}/report/alfred_qc/{mapper}.{library_name}.alfred.json.gz"
yield "json_md5", "work/{mapper}.{library_name}/report/alfred_qc/{mapper}.{library_name}.alfred.json.gz.md5"

@dictify
def get_log_file(self, action):
Expand Down Expand Up @@ -1235,10 +1205,6 @@ def _get_params_run(self, wildcards):

return {
"path_targets_bed": path_targets_bed,
"max_coverage": self.config["target_coverage_report"]["max_coverage"],
"min_cov_warning": self.config["target_coverage_report"]["min_cov_warning"],
"min_cov_ok": self.config["target_coverage_report"]["min_cov_ok"],
"detailed_reporting": self.config["target_coverage_report"]["detailed_reporting"],
}

def get_resource_usage(self, action):
Expand Down Expand Up @@ -1462,7 +1428,7 @@ def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir)
Minimap2StepPart,
StarStepPart,
StrandednessStepPart,
TargetCoverageReportStepPart,
TargetCovReportStepPart,
BamCollectDocStepPart,
NgsChewStepPart,
)
Expand Down
8 changes: 4 additions & 4 deletions snappy_pipeline/workflows/varfish_export/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ def _get_input_files_bam_qc(self, wildcards):
# Get names of primary libraries of the selected pedigree. The pedigree is selected
# by the primary DNA NGS library of the index.
pedigree = self.index_ngs_library_to_pedigree[wildcards.index_ngs_library]
result = {"bamstats": [], "flagstats": [], "idxstats": [], "cov_qc": []}
result = {"bamstats": [], "flagstats": [], "idxstats": [], "alfred_qc": []}
for donor in pedigree.donors:
if not donor.dna_ngs_library:
continue
Expand All @@ -424,10 +424,10 @@ def _get_input_files_bam_qc(self, wildcards):
if donor.dna_ngs_library.name not in self.parent.ngs_library_to_kit:
continue
path = (
f"output/{wildcards.mapper}.{donor.dna_ngs_library.name}/report/cov_qc/"
f"{wildcards.mapper}.{donor.dna_ngs_library.name}.txt"
f"output/{wildcards.mapper}.{donor.dna_ngs_library.name}/report/alfred_qc/"
f"{wildcards.mapper}.{donor.dna_ngs_library.name}.alfred.json.gz"
)
result["cov_qc"].append(ngs_mapping(path))
result["alfred_qc"].append(ngs_mapping(path))
return result

@dictify
Expand Down
16 changes: 4 additions & 12 deletions snappy_pipeline/workflows/variant_export_external/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
ResourceUsage,
WritePedigreeSampleNameStepPart,
)
from snappy_pipeline.workflows.ngs_mapping import TargetCoverageReportStepPart
from snappy_pipeline.workflows.ngs_mapping import TargetCovReportStepPart

#: Default configuration for the somatic_variant_calling step
DEFAULT_CONFIG = r"""
Expand All @@ -119,14 +119,10 @@
# region coverage or selecting targeted exons. Only used if 'bam_available_flag' is True.
# It will not generated detailed reporting.
path_targets_bed: OPTIONAL # OPTIONAL
# Maximal/minimal/warning coverage
max_coverage: 200
min_cov_warning: 20 # >= 20x for WARNING
min_cov_ok: 50 # >= 50x for OK
"""


class BamReportsExternalStepPart(TargetCoverageReportStepPart):
class BamReportsExternalStepPart(TargetCovReportStepPart):
"""Build target coverage report and QC report for external BAM files"""

#: Step name
Expand Down Expand Up @@ -166,7 +162,7 @@ def get_output_files(self, action):
def _get_input_files_collect(self, wildcards):
_ = wildcards
mapper_lib = "{mapper}.{library_name}"
yield f"work/{mapper_lib}/report/cov_qc/{mapper_lib}.txt"
yield f"work/{mapper_lib}/report/alfred_qc/{mapper_lib}.alfred.json.gz"

@dictify
def _get_output_files_bam_qc_work(self):
Expand Down Expand Up @@ -214,10 +210,6 @@ def _get_params_run(self, wildcards):
"bam": sorted(list(self._collect_bam_files(wildcards))),
"bam_count": len(sorted(list(self._collect_bam_files(wildcards)))),
"path_targets_bed": self.config["target_coverage_report"]["path_targets_bed"],
"max_coverage": self.config["target_coverage_report"]["max_coverage"],
"min_cov_warning": self.config["target_coverage_report"]["min_cov_warning"],
"min_cov_ok": self.config["target_coverage_report"]["min_cov_ok"],
"detailed_reporting": False,
}

def _get_params_bam_qc(self, wildcards):
Expand Down Expand Up @@ -335,7 +327,7 @@ def _get_input_files_bam_qc(self, wildcards):
result[key].append(tpl % key)
if donor.dna_ngs_library.name not in self.parent.ngs_library_list:
continue
path = f"work/{mapper}.{library_name}/report/cov_qc/{mapper}.{library_name}.txt"
path = f"work/{mapper}.{library_name}/report/alfred_qc/{mapper}.{library_name}.alfred.json.gz"
result["cov_qc"].append(path)

return result
Expand Down
Loading
Loading