Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Dec 27, 2022
1 parent 47e9a8c commit 8643d70
Show file tree
Hide file tree
Showing 25 changed files with 366 additions and 510 deletions.
2 changes: 1 addition & 1 deletion snappy_pipeline/apps/snappy_snake.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ def main(argv=None):
dest="conda_create_envs_only",
action="store_true",
default=False,
help="Prepare all conda environments"
help="Prepare all conda environments",
)

args = parser.parse_args(argv)
Expand Down
62 changes: 28 additions & 34 deletions snappy_pipeline/workflows/variant_calling/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -73,23 +73,39 @@ rule variant_calling_bcftools_run:
# Run GATK HaplotypeCaller (direct, pedigree/multi-sample) --------------------


rule variant_calling_gatk_hc_run:
rule variant_calling_gatk3_hc_run:
input:
unpack(wf.get_input_files("gatk_hc", "run")),
unpack(wf.get_input_files("gatk3_hc", "run")),
output:
**wf.get_output_files("gatk_hc", "run"),
threads: wf.get_resource("gatk_hc", "run", "threads")
**wf.get_output_files("gatk3_hc", "run"),
threads: wf.get_resource("gatk3_hc", "run", "threads")
resources:
time=wf.get_resource("gatk_hc", "run", "time"),
memory=wf.get_resource("gatk_hc", "run", "memory"),
partition=wf.get_resource("gatk_hc", "run", "partition"),
params:
step_key="variant_calling",
caller_key="gatk_hc",
time=wf.get_resource("gatk3_hc", "run", "time"),
memory=wf.get_resource("gatk3_hc", "run", "memory"),
partition=wf.get_resource("gatk3_hc", "run", "partition"),
log:
**wf.get_log_file("gatk3_hc", "run"),
wrapper:
wf.wrapper_path("gatk3_hc")


# Run GATK UnifiedGenotyper ---------------------------------------------------


rule variant_calling_gatk3_ug_run:
input:
unpack(wf.get_input_files("gatk3_ug", "run")),
output:
**wf.get_output_files("gatk3_ug", "run"),
threads: wf.get_resource("gatk3_ug", "run", "threads")
resources:
time=wf.get_resource("gatk3_ug", "run", "time"),
memory=wf.get_resource("gatk3_ug", "run", "memory"),
partition=wf.get_resource("gatk3_ug", "run", "partition"),
log:
**wf.get_log_file("gatk_hc", "run"),
**wf.get_log_file("gatk3_ug", "run"),
wrapper:
wf.wrapper_path("gatk_hc_par")
wf.wrapper_path("gatk3_ug")


# Run GATK 4 HaplotypeCaller Joint (direct, pedigree/multi-sample) ------------
Expand Down Expand Up @@ -174,28 +190,6 @@ rule variant_calling_gatk4_hc_gvcf_genotype:
wf.wrapper_path("gatk4_hc/genotype")


# Run GATK UnifiedGenotyper ---------------------------------------------------


rule variant_calling_gatk_ug_run:
input:
unpack(wf.get_input_files("gatk_ug", "run")),
output:
**wf.get_output_files("gatk_ug", "run"),
threads: wf.get_resource("gatk_ug", "run", "threads")
resources:
time=wf.get_resource("gatk_ug", "run", "time"),
memory=wf.get_resource("gatk_ug", "run", "memory"),
partition=wf.get_resource("gatk_ug", "run", "partition"),
params:
step_key="variant_calling",
caller_key="gatk_ug",
log:
**wf.get_log_file("gatk_ug", "run"),
wrapper:
wf.wrapper_path("gatk_ug_par")


# QC / Statistics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# Generate BCFtools stats report ----------------------------------------------
Expand Down
96 changes: 18 additions & 78 deletions snappy_pipeline/workflows/variant_calling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@
from snappy_pipeline.workflows.abstract import (
BaseStep,
BaseStepPart,
LinkOutStepPart,
ResourceUsage,
WritePedigreeStepPart,
)
Expand Down Expand Up @@ -204,78 +203,20 @@
gatk4_hc_joint:
window_length: 10000000
num_threads: 16
allow_seq_dict_incompatibility: false # REQUIRED
annotations: [] # REQUIRED
allow_seq_dict_incompatibility: false
gatk4_hc_gvcf:
window_length: 10000000
num_threads: 16
allow_seq_dict_incompatibility: false # REQUIRED
annotations: [] # REQUIRED
gatk_hc:
# Parallelization configuration
num_threads: 2 # number of cores to use locally
window_length: 5000000 # split input into windows of this size, each triggers a job
num_jobs: 500 # number of windows to process in parallel
use_profile: true # use Snakemake profile for parallel processing
restart_times: 0 # number of times to re-launch jobs in case of failure
max_jobs_per_second: 10 # throttling of job creation
max_status_checks_per_second: 10 # throttling of status jobs
debug_trunc_tokens: 0 # truncation to first N tokens (0 for none)
keep_tmpdir: never # keep temporary directory, {always, never, onerror}
job_mult_memory: 1 # memory multiplier
job_mult_time: 1 # running time multiplier
merge_mult_memory: 1 # memory multiplier for merging
merge_mult_time: 1 # running time multiplier for merging
# GATK HC--specific configuration
allow_seq_dict_incompatibility: false
annotations:
- BaseQualityRankSumTest
- FisherStrand
- GCContent
- HaplotypeScore
- HomopolymerRun
- MappingQualityRankSumTest
- MappingQualityZero
- QualByDepth
- ReadPosRankSumTest
- RMSMappingQuality
- DepthPerAlleleBySample
- Coverage
- ClippingRankSumTest
- DepthPerSampleHC
gatk_ug:
# Parallelization configuration
num_threads: 2 # number of cores to use locally
window_length: 5000000 # split input into windows of this size, each triggers a job
num_jobs: 500 # number of windows to process in parallel
use_profile: true # use Snakemake profile for parallel processing
restart_times: 0 # number of times to re-launch jobs in case of failure
max_jobs_per_second: 10 # throttling of job creation
max_status_checks_per_second: 10 # throttling of status jobs
debug_trunc_tokens: 0 # truncation to first N tokens (0 for none)
keep_tmpdir: never # keep temporary directory, {always, never, onerror}
job_mult_memory: 1 # memory multiplier
job_mult_time: 1 # running time multiplier
merge_mult_memory: 1 # memory multiplier for merging
merge_mult_time: 1 # running time multiplier for merging
# GATK UG--specific configuration
gatk3_hc:
num_threads: 16
window_length: 10000000
allow_seq_dict_incompatibility: false
gatk3_ug:
num_threads: 16
window_length: 10000000
allow_seq_dict_incompatibility: false
downsample_to_coverage: 250
annotations:
- BaseQualityRankSumTest
- FisherStrand
- GCContent
- HaplotypeScore
- HomopolymerRun
- MappingQualityRankSumTest
- MappingQualityZero
- QualByDepth
- ReadPosRankSumTest
- RMSMappingQuality
- DepthPerAlleleBySample
- Coverage
- ClippingRankSumTest
- DepthPerSampleHC
"""


Expand Down Expand Up @@ -467,8 +408,8 @@ def get_resource_usage(self, action):
)


class GatkCallerStepPartBase(VariantCallingStepPart):
"""Germlin variant calling with GATK caller"""
class Gatk3CallerStepPartBase(VariantCallingStepPart):
"""Germlin variant calling with GATK v3 caller"""

def check_config(self):
if self.__class__.name not in self.config["tools"]:
Expand All @@ -495,24 +436,24 @@ def get_resource_usage(self, action):
)


class GatkHaplotypeCallerStepPart(GatkCallerStepPartBase):
class Gatk3HaplotypeCallerStepPart(Gatk3CallerStepPartBase):
"""Germline variant calling with GATK HaplotypeCaller
This triggers the cluster-parallel variant calling with gatk_ug, GATK3.
This triggers the cluster-parallel variant calling with gatk3_hc, GATK3.
"""

#: Step name
name = "gatk_hc"
name = "gatk3_hc"


class GatkUnifiedGenotyperStepPart(GatkCallerStepPartBase):
class Gatk3UnifiedGenotyperStepPart(Gatk3CallerStepPartBase):
"""Germline variant calling with GATK UnifiedGenotyper
This triggers the cluster-parallel variant calling with gatk_ug, GATK3.
This triggers the cluster-parallel variant calling with gatk3_ug, GATK3.
"""

#: Step name
name = "gatk_ug"
name = "gatk3_ug"


class Gatk4CallerStepPartBase(VariantCallingStepPart):
Expand Down Expand Up @@ -900,14 +841,13 @@ def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir)
(
WritePedigreeStepPart,
BcftoolsStepPart,
GatkHaplotypeCallerStepPart,
GatkUnifiedGenotyperStepPart,
Gatk3HaplotypeCallerStepPart,
Gatk3UnifiedGenotyperStepPart,
Gatk4HaplotypeCallerJointStepPart,
Gatk4HaplotypeCallerGvcfStepPart,
BcftoolsStatsStepPart,
JannovarStatisticsStepPart,
BafFileGenerationStepPart,
LinkOutStepPart,
)
)
# Register sub workflows
Expand Down
1 change: 1 addition & 0 deletions snappy_wrappers/wrappers/gatk3_hc/environment.yaml
Loading

0 comments on commit 8643d70

Please sign in to comment.