Skip to content

Commit

Permalink
feat: make ngs_mapping sub steps generate links (#291) (#294)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Dec 23, 2022
1 parent efe6539 commit 6f2edd7
Show file tree
Hide file tree
Showing 27 changed files with 576 additions and 1,193 deletions.
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ def parse_requirements(path):
print("At least Python 3.7 is required.\n", file=sys.stderr)
sys.exit(1)

with open("README.rst") as readme_file:
with open("README.md") as readme_file:
readme = readme_file.read()

with open("HISTORY.rst") as history_file:
with open("CHANGELOG.md") as history_file:
history = history_file.read()

# Get requirements
Expand Down
15 changes: 15 additions & 0 deletions snappy_pipeline/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

__author__ = "Manuel Holtgrewe <manuel.holtgrewe@bih-charite.de>"

import typing


def listify(gen):
"""Decorator that converts a generator into a function which returns a list
Expand Down Expand Up @@ -44,6 +46,19 @@ def patched(*args, **kwargs):
return patched


def flatten(coll: typing.List[typing.Union[str, typing.List[str]]]) -> typing.List[str]:
"""Flatten collection of strings or list of strings.
Source: https://stackoverflow.com/a/17865033
"""
for i in coll:
if isinstance(i, typing.Iterable) and not isinstance(i, str):
for subc in flatten(i):
yield subc
else:
yield i


def try_or_none(func, exceptions):
"""Helper that tries to execute the function
Expand Down
2 changes: 1 addition & 1 deletion snappy_pipeline/workflows/abstract/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
"""Base classes for the actual pipeline steps"""

from collections import OrderedDict
from collections.abc import MutableMapping
from fnmatch import fnmatch
Expand Down Expand Up @@ -1086,7 +1087,6 @@ def get_input_files(self, action):
return [] # no input

def get_output_files(self, action):
"""Return output files that are generated by snappy-gatk_post_bam"""
assert action == "run", "Unsupported action"
return touch(self.base_pattern_out)

Expand Down
3 changes: 0 additions & 3 deletions snappy_pipeline/workflows/adapter_trimming/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,13 +585,10 @@ def input_function(wildcards):
"""Helper wrapper function"""
return expand(self.base_path_in.format(wildcards=wildcards), sub_dir=self.sub_dirs)

# Validate action
self._validate_action(action)
return input_function

def get_output_files(self, action):
"""Return output files that are generated by snappy-gatk_post_bam"""
# Validate action
self._validate_action(action)
return expand(self.base_path_out, sub_dir=self.sub_dirs)

Expand Down
9 changes: 5 additions & 4 deletions snappy_pipeline/workflows/gcnv/gcnv_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,13 @@ def validate_precomputed_model_paths_config(self, config):
values_pass = all(isinstance(value, str) for value in model.values())
# Validate
if not (n_keys_pass and keys_pass and values_pass):
msg_tpl = (
pretty_model = self._pretty_print_config(config=model)
msg = (
"Provided configuration not as expected...\n"
"Expected:\n{e_}\nObserved:\n{o_}\n"
f"\nn_keys_pass={n_keys_pass}, keys_pass={keys_pass}, values_pass={values_pass}\n"
f"Expected:\n{expected_format}\nObserved:\n{pretty_model}\n"
)
pretty_model = self._pretty_print_config(config=model)
raise InvalidConfiguration(msg_tpl.format(e_=expected_format, o_=pretty_model))
raise InvalidConfiguration(msg)

def _pretty_print_config(self, config):
"""Pretty format configuration.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,9 @@ def get_result_files(self):
for tool in self.config["tools"]:
for sheet in filter(is_not_background, self.shortcut_sheets):
for ngs_library in sheet.all_ngs_libraries:
extraction_type = ngs_library.test_sample.extra_infos["extractionType"]
extraction_type = ngs_library.test_sample.extra_infos.get(
"extractionType", "DNA"
)
if extraction_type.lower() == "rna":
if tool == "salmon":
fns = expand(
Expand Down
6 changes: 4 additions & 2 deletions snappy_pipeline/workflows/hla_typing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def _collect_reads(self, wildcards, library_name, prefix):
def _get_seq_type(self, wildcards):
"""Return sequence type for the library name in wildcards"""
library = self.parent.ngs_library_name_to_ngs_library[wildcards.library_name]
return library.test_sample.extra_infos["extractionType"].lower()
return library.test_sample.extra_infos.get("extractionType", "DNA").lower()

def get_resource_usage(self, action):
"""Get Resource Usage
Expand Down Expand Up @@ -329,7 +329,9 @@ def _yield_result_files(self, tpl, **kwargs):
for ngs_library in sheet.all_ngs_libraries:
for tool in self.config["tools"]:
supported = self.sub_steps[tool].supported_extraction_types
extraction_type = ngs_library.test_sample.extra_infos["extractionType"].lower()
extraction_type = ngs_library.test_sample.extra_infos.get(
"extractionType", "DNA"
).lower()
if extraction_type in supported:
yield from expand(
tpl,
Expand Down
67 changes: 1 addition & 66 deletions snappy_pipeline/workflows/ngs_mapping/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ localrules:
# Linking the FASTQ files in and linking out the alignments should
# be done locally by the Snakemake master process
ngs_mapping_link_in_run,
ngs_mapping_link_out_bam_run,
ngs_mapping_link_out_run,


rule all:
Expand All @@ -52,30 +50,6 @@ rule ngs_mapping_link_in_run: # localrule
shell(wf.get_shell_cmd("link_in", "run", wildcards))


# Link out BAM files ----------------------------------------------------------


rule ngs_mapping_link_out_bam_run: # localrule
input:
wf.get_input_files("link_out_bam", "run"),
output:
wf.get_output_files("link_out_bam", "run"),
run:
shell(wf.get_shell_cmd("link_out_bam", "run", wildcards))


# Generic linking out ---------------------------------------------------------


rule ngs_mapping_link_out_run: # localrule
input:
wf.get_input_files("link_out", "run"),
output:
wf.get_output_files("link_out", "run"),
run:
shell(wf.get_shell_cmd("link_out", "run", wildcards))


# Alignment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# Run link in external bam files ----------------------------------------------
Expand Down Expand Up @@ -141,26 +115,6 @@ rule ngs_mapping_star_run:
wf.wrapper_path("star")


# GATK-based BAM postprocessing -----------------------------------------------

if wf.config["postprocessing"] == "gatk_post_bam":

rule ngs_mapping_gatk_post_bam_run:
input:
wf.get_input_files("gatk_post_bam", "run"),
output:
**wf.get_output_files("gatk_post_bam", "run"),
threads: wf.get_resource("gatk_post_bam", "run", "threads")
resources:
time=wf.get_resource("gatk_post_bam", "run", "time"),
memory=wf.get_resource("gatk_post_bam", "run", "memory"),
partition=wf.get_resource("gatk_post_bam", "run", "partition"),
log:
**wf.get_log_file("gatk_post_bam", "run"),
wrapper:
wf.wrapper_path("gatk_post_bam")


# Run minimap2 ---------------------------------------------------------------


Expand Down Expand Up @@ -217,30 +171,11 @@ rule ngs_mapping_target_coverage_report_collect:
memory=wf.get_resource("target_coverage_report", "run", "memory"),
partition=wf.get_resource("target_coverage_report", "run", "partition"),
log:
wf.get_log_file("target_coverage_report", "collect"),
**wf.get_log_file("target_coverage_report", "collect"),
wrapper:
wf.wrapper_path("target_cov_report/collect")


# Compute and link out per-base coverage report -------------------------------


rule ngs_mapping_genome_coverage_report_run:
input:
**wf.get_input_files("genome_coverage_report", "run"),
output:
**wf.get_output_files("genome_coverage_report", "run"),
threads: wf.get_resource("genome_coverage_report", "run", "threads")
resources:
time=wf.get_resource("genome_coverage_report", "run", "time"),
memory=wf.get_resource("genome_coverage_report", "run", "memory"),
partition=wf.get_resource("genome_coverage_report", "run", "partition"),
log:
wf.get_log_file("genome_coverage_report", "run"),
run:
shell(wf.get_shell_cmd("genome_coverage_report", "run", wildcards))


# Compute depth of coverage files (VCF and bigWig) ----------------------------


Expand Down
Loading

0 comments on commit 6f2edd7

Please sign in to comment.