Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: adjustment of manta wrapper for sv_calling_wgs #351

Merged
merged 1 commit into from
Jan 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions snappy_wrappers/wrappers/manta/germline_targeted/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@
cp -ra $workdir/results $outdir
rm -rf $workdir

sleep 1min # for good measure

pushd $outdir
tar czf results.tar.gz results
ln -sr results/variants/diploidSV.vcf.gz $(basename {snakemake.output.vcf})
Expand Down
84 changes: 72 additions & 12 deletions snappy_wrappers/wrappers/manta/germline_wgs/wrapper.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,65 @@
# -*- coding: utf-8 -*-
"""Wrapper for running Manta in germline variant calling mode on WGS data
"""
from snakemake import shell

from snakemake.shell import shell
__author__ = "Manuel Holtgrewe <manuel.holtgrewe@bih-charite.de>"

__author__ = "Manuel Holtgrewe"
__email__ = "manuel.holtgrewe@bih-charite.de"
DEF_HELPER_FUNCS = r"""
compute-md5()
{
if [[ $# -ne 2 ]]; then
>&2 echo "Invalid number of arguments: $#"
exit 1
fi
md5sum $1 \
| awk '{ gsub(/.*\//, "", $2); print; }' \
> $2
}
"""

shell(
r"""
# -----------------------------------------------------------------------------
# Redirect stderr to log file by default and enable printing executed commands
exec 2> >(tee -a "{snakemake.log}")
set -x
# -----------------------------------------------------------------------------

# Write files for reproducibility -----------------------------------------------------------------

{DEF_HELPER_FUNCS}

# Write out information about conda and save a copy of the wrapper with picked variables
# as well as the environment.yaml file.
conda list >{snakemake.log.conda_list}
conda info >{snakemake.log.conda_info}
compute-md5 {snakemake.log.conda_list} {snakemake.log.conda_list_md5}
compute-md5 {snakemake.log.conda_info} {snakemake.log.conda_info_md5}
cp {__real_file__} {snakemake.log.wrapper}
compute-md5 {snakemake.log.wrapper} {snakemake.log.wrapper_md5}
cp $(dirname {__file__})/environment.yaml {snakemake.log.env_yaml}
compute-md5 {snakemake.log.env_yaml} {snakemake.log.env_yaml_md5}

# Also pipe stderr to log file --------------------------------------------------------------------

if [[ -n "{snakemake.log.log}" ]]; then
if [[ "$(set +e; tty; set -e)" != "" ]]; then
rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log})
exec 2> >(tee -a "{snakemake.log.log}" >&2)
else
rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log})
echo "No tty, logging disabled" >"{snakemake.log.log}"
fi
fi

# Create auto-cleaned temporary directory
export TMPDIR=$(mktemp -d)
trap "rm -rf $TMPDIR" EXIT

# Run actual tools --------------------------------------------------------------------------------

basedir=$(dirname $(dirname {snakemake.output.vcf}))
workdir=$basedir/work
outdir=$basedir/out

# Ensure the working directory is removed, configManta.py will bail out if it already exists
trap "rm -rf \"$workdir\"" EXIT
# Clear out $outdir, there may be some old files remaining that are not governed by Snakemake
rm -rf $outdir/*

configManta.py \
--referenceFasta {snakemake.config[static_data_config][reference][path]} \
Expand All @@ -35,6 +74,8 @@
cp -ra $workdir/results $outdir
rm -rf $workdir

sleep 1min # for good measure

pushd $outdir
tar czf results.tar.gz results
ln -sr results/variants/diploidSV.vcf.gz $(basename {snakemake.output.vcf})
Expand All @@ -43,9 +84,28 @@
$(basename {snakemake.output.vcf} .vcf.gz).candidates.vcf.gz
ln -sr results/variants/candidateSV.vcf.gz.tbi \
$(basename {snakemake.output.vcf} .vcf.gz).candidates.vcf.gz.tbi
popd

# Compute MD5 sums on output files
compute-md5 {snakemake.output.vcf} {snakemake.output.vcf_md5}
compute-md5 {snakemake.output.vcf_tbi} {snakemake.output.vcf_tbi_md5}

# Create output links -----------------------------------------------------------------------------

for f in results.tar.gz *.vcf.gz *.tbi; do
md5sum $f >$f.md5
for path in {snakemake.output.output_links}; do
dst=$path
src=work/${{dst#output/}}
ln -sr $src $dst
done
"""
)

# Compute MD5 sums of logs.
shell(
r"""
{DEF_HELPER_FUNCS}

sleep 1s # try to wait for log file flush
compute-md5 {snakemake.log.log} {snakemake.log.log_md5}
"""
)