-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add CI for a scaled down version of a cancer WES pipeline (#499)
- Loading branch information
Showing
40 changed files
with
12,808 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
name: Tests | ||
|
||
on: | ||
# always run tests when pushing to main | ||
push: | ||
branches: [ main ] | ||
|
||
# only run tests on pull requests which actually modify files that affect the tests | ||
pull_request: | ||
branches_ignore: [] | ||
# for now, the tests defined in this file are only run when any of the following paths are modified: | ||
paths: | ||
# workflow definitions | ||
- '.tests/**' | ||
|
||
# github workflow configurations | ||
- '.github/**' | ||
|
||
# 'base' files of snappy-pipeline | ||
- 'snappy_pipeline/*' | ||
- 'snappy_pipeline/workflows/abstract/**' | ||
|
||
# steps used in the test workflows | ||
- 'snappy_pipeline/workflows/ngs_mapping/**' | ||
- 'snappy_pipeline/workflows/somatic_variant_calling/**' | ||
- 'snappy_pipeline/workflows/somatic_variant_annotation/**' | ||
- 'snappy_pipeline/workflows/somatic_variant_filtration/**' | ||
|
||
# 'base' files of snappy_wrappers | ||
- 'snappy_wrappers/*' | ||
|
||
# wrappers used in the test workflows | ||
- 'snappy_wrappers/wrappers/alfred/**' | ||
- 'snappy_wrappers/wrappers/bcftools/**' | ||
- 'snappy_wrappers/wrappers/bwa/**' | ||
- 'snappy_wrappers/wrappers/link_in_bam/**' | ||
- 'snappy_wrappers/wrappers/mutect/**' | ||
- 'snappy_wrappers/wrappers/mutect_par/**' | ||
- 'snappy_wrappers/wrappers/mutect2/**' | ||
- 'snappy_wrappers/wrappers/mutect2_par/**' | ||
- 'snappy_wrappers/wrappers/ngs_chew/**' | ||
- 'snappy_wrappers/wrappers/somatic_variant_filtration/**' | ||
- 'snappy_wrappers/wrappers/vep/**' | ||
|
||
|
||
jobs: | ||
|
||
Dryrun_Tests: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
with: | ||
lfs: 'true' | ||
- name: Test workflow (local FASTQs) | ||
uses: snakemake/snakemake-github-action@v1 | ||
with: | ||
directory: .tests/test-workflow | ||
snakefile: .tests/test-workflow/workflow/Snakefile | ||
args: "--configfile .tests/test-workflow/config/config.yaml --use-conda --show-failed-logs -j 2 --conda-cleanup-pkgs cache --dryrun" | ||
show-disk-usage-on-error: true | ||
|
||
|
||
Tests: | ||
runs-on: ubuntu-latest | ||
needs: | ||
- Dryrun_Tests | ||
steps: | ||
- name: update apt | ||
run: sudo apt-get update | ||
- uses: actions/checkout@v4 | ||
with: | ||
lfs: 'true' | ||
- name: Test workflow (local FASTQs) | ||
uses: snakemake/snakemake-github-action@v1 | ||
with: | ||
directory: .tests/test-workflow | ||
snakefile: .tests/test-workflow/workflow/Snakefile | ||
args: "--configfile .tests/test-workflow/config/config.yaml --use-conda --show-failed-logs -j 2 --conda-cleanup-pkgs cache" | ||
show-disk-usage-on-error: true | ||
|
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
**/.snappy_path_cache | ||
**/.*.lock | ||
**/output | ||
**/work | ||
logs | ||
snappy-pipeline | ||
pipelines/*/.snappy_pipeline/config.yaml | ||
**/__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
static_data_config: | ||
reference: | ||
path: {{ params["reference"] }} | ||
|
||
step_config: | ||
ngs_mapping: | ||
tools: | ||
dna: [bwa] # Required if DNA analysis; otherwise, leave empty. Example: 'bwa'. | ||
bwa: | ||
path_index: {{ params["bwa_index"] }} | ||
mask_duplicates: false | ||
memory_bam_sort: 2G | ||
num_threads_bam_sort: 2 | ||
num_threads_bam_view: 2 | ||
num_threads_align: 2 | ||
ngs_chew_fingerprint: | ||
enabled: false | ||
target_coverage_report: | ||
path_target_interval_list_mapping: | ||
- name: MedExome_hg19_empirical_targets | ||
pattern: MedExome | ||
path: ../resources/Exome-MedExome.chr12.bed | ||
|
||
|
||
somatic_variant_calling: | ||
tools: [mutect2] | ||
mutect2: | ||
ngs_mapping: ../ngs_mapping | ||
extra_arguments: [] | ||
window_length: 300000000 | ||
keep_tmpdir: onerror | ||
job_mult_time: 5 | ||
|
||
|
||
somatic_variant_annotation: | ||
path_somatic_variant_calling: ../somatic_variant_calling # REQUIRED | ||
tools: ["vep"] | ||
vep: | ||
cache_dir: {{ params["vep_cache"] }} | ||
assembly: GRCh37 | ||
|
||
|
||
somatic_variant_filtration: | ||
path_somatic_variant: ../somatic_variant_annotation | ||
path_ngs_mapping: ../ngs_mapping | ||
filter_list: | ||
- dkfz: {} | ||
- bcftools: | ||
exclude: FORMAT/DP[1]<=50 | AD[1:1]<5 | AD[1:1]/(AD[1:0]+AD[1:1])<0.025 | ||
|
||
|
||
|
||
data_sets: | ||
trbc: | ||
file: samplesheet.tsv | ||
search_patterns: | ||
- {left: '*.R1.fastq.gz', right: '*.R2.fastq.gz'} | ||
search_paths: | ||
- ../raw | ||
type: matched_cancer | ||
naming_scheme: only_secondary_id |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
reference: | ||
species: "homo_sapiens" | ||
datatype: "dna" | ||
build: "GRCh37" | ||
release: 111 | ||
chromosome: "12" | ||
|
||
pipeline-configuration: | ||
cancer_wes: | ||
workdir: "pipelines/snappy-cancer_wes" | ||
config: "pipelines/snappy-cancer_wes/.snappy_pipeline/config.yaml" | ||
samplesheet: "pipelines/snappy-cancer_wes/.snappy_pipeline/samplesheet.tsv" |
1 change: 1 addition & 0 deletions
1
.tests/test-workflow/pipelines/snappy-cancer_wes/.gitattributes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
raw filter=lfs diff=lfs merge=lfs -text |
15 changes: 15 additions & 0 deletions
15
.tests/test-workflow/pipelines/snappy-cancer_wes/.snappy_pipeline/samplesheet.tsv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
[Metadata] | ||
schema cancer_matched | ||
schema_version v1 | ||
title TRCB | ||
description Public cancer dataset https://www.nature.com/articles/sdata201610 | ||
|
||
[Custom Fields] | ||
key annotatedEntity docs type minimum maximum unit choices pattern | ||
extractionType bioSample extraction type string 0 0 0 0 0 | ||
libraryKit ngsLibrary exome enrichment kit string 0 0 0 0 0 | ||
|
||
[Data] | ||
patientName sampleName isTumor extractionType libraryType folderName libraryKit | ||
case001subregion N1 N DNA WES case001subregion-N1-DNA1-WES1 MedExome | ||
case001subregion T1 Y DNA WES case001subregion-T1-DNA1-WES1 MedExome |
5 changes: 5 additions & 0 deletions
5
.tests/test-workflow/pipelines/snappy-cancer_wes/ngs_mapping/config.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pipeline_step: | ||
name: ngs_mapping | ||
version: 1 | ||
|
||
$ref: 'file://../.snappy_pipeline/config.yaml' |
135 changes: 135 additions & 0 deletions
135
.tests/test-workflow/pipelines/snappy-cancer_wes/ngs_mapping/pipeline_job.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
#!/bin/bash | ||
|
||
# SNAPPY best practice pipeline_job.sh | ||
# | ||
# Version: 3 | ||
# Date: 2017-02-02 | ||
|
||
# The medium project/queue is a sensible default. | ||
#SBATCH --partition medium | ||
# Set a required running time for the master job. | ||
#SBATCH --time 3-00 | ||
# Reserve some resources | ||
#SBATCH --mem=6G | ||
# Keep current environment variables | ||
#SBATCH --export=all | ||
# Send a mail upon job completion and error | ||
##SBATCH --mail-type ALL | ||
##SBATCH --mail-user your.name@mdc-berlin.de | ||
# Logs should be written into "slurm_log" sub directory. | ||
#SBATCH --output slurm_log/%x-%J.log | ||
# Use more descriptive name in Slurm. | ||
#SBATCH --job-name ngs_mapping | ||
|
||
# Enable the official bash strict mode (fail early, fail often) | ||
set -euo pipefail | ||
|
||
# Fix the umask. | ||
umask ug=rwx,o= | ||
|
||
# Configuration variables --------------------------------------------------- | ||
|
||
# Maximal number of jobs to execute at the same time | ||
MAX_JOBS=500 | ||
# Maximal number of jobs per second | ||
MAX_JOBS_PER_SECOND=10 | ||
# Number of times to restart jobs | ||
RESTART_TIMES=0 | ||
|
||
# Check preconditions ------------------------------------------------------- | ||
|
||
# Ensure slurm_log is a directory | ||
test -d slurm_log || { >&2 echo "${PWD}/slurm_log does not exist"; exit 1; } | ||
|
||
# Enforce existence of TMPDIR ----------------------------------------------- | ||
|
||
export TMPDIR=${HOME}/scratch/tmp | ||
mkdir -p ${TMPDIR} | ||
|
||
# Create one log directory per Snakemake run -------------------------------- | ||
|
||
test -z "${SLURM_JOB_ID-}" && SLURM_JOB_ID=$(date +%Y-%m-%d_%H-%M) | ||
LOGDIR=slurm_log/${SLURM_JOB_ID} | ||
mkdir -p ${LOGDIR} | ||
export SBATCH_DEFAULTS=" --output=${LOGDIR}/%x-%j.log" | ||
|
||
# Activate appropriate Miniconda3 installation ------------------------------ | ||
|
||
# 1. If CONDA_PATH is set, use this. | ||
# 2. Look into parent directories for miniconda3 (owned by current user) | ||
# 3. Look whether there is a conda in $PATH and use it. | ||
# 4. Look for ~/miniconda3 and use it | ||
# 5. If all fails, bail out. | ||
|
||
conda-in-parent() | ||
{ | ||
current=$PWD | ||
while [[ -n "$current" ]] && [[ "$current" != "/" ]]; do | ||
if [[ -e "$current/miniconda3.$USER" ]] && \ | ||
[[ $(stat -c %u $current/miniconda3.$USER) == $UID ]]; then | ||
echo "$current/miniconda3.$USER" | ||
return 0 | ||
fi | ||
if [[ -e "$current/miniconda3" ]] && \ | ||
[[ $(stat -c %u $current/miniconda3) == $UID ]]; then | ||
echo "$current/miniconda3" | ||
return 0 | ||
fi | ||
current=$(dirname $current) | ||
done | ||
|
||
return 1 | ||
} | ||
|
||
if [[ -n "${CONDA_PATH-}" ]] || CONDA_PATH=$(conda-in-parent); then | ||
: | ||
elif which conda >/dev/null; then | ||
CONDA_PATH=$(dirname $(dirname $(which conda))) | ||
elif [[ -e $HOME/miniconda3 ]]; then | ||
CONDA_PATH=$HOME/miniconda3 | ||
elif [[ -e $HOME/work/miniconda3 ]]; then | ||
CONDA_PATH=$HOME/work/miniconda3 | ||
else | ||
>&2 echo "Could not determine a suitable CONDA_PATH." | ||
exit 1 | ||
fi | ||
|
||
>&2 echo "Using conda installation in $CONDA_PATH" | ||
>&2 echo "+ conda activate " | ||
set +euo pipefail | ||
conda deactivate &>/dev/null || true # disable any existing | ||
source $CONDA_PATH/etc/profile.d/conda.sh | ||
conda activate # enable found | ||
set -euo pipefail | ||
|
||
# Activate bash cmd printing, debug info ------------------------------------ | ||
|
||
set -x | ||
>&2 hostname | ||
>&2 date | ||
|
||
# Kick off Snakemake -------------------------------------------------------- | ||
|
||
# Interpret array jobs. | ||
# Allow selection of batch | ||
if [[ ! -z "${SNAPPY_BATCH-}" ]]; then | ||
SNAKEMAKE_BATCH_ARG="--batch ${SNAKEMAKE_BATCH_RULE-default}=${SNAPPY_BATCH}" | ||
else | ||
SNAKEMAKE_BATCH_ARG= | ||
fi | ||
|
||
# Using the medium project/queue is a sensible default. | ||
snappy-snake --printshellcmds \ | ||
${SNAKEMAKE_BATCH_ARG} \ | ||
--snappy-pipeline-use-profile "cubi-v1" \ | ||
--snappy-pipeline-jobs $MAX_JOBS \ | ||
--restart-times ${RESTART_TIMES} \ | ||
--default-partition="medium" \ | ||
--rerun-incomplete \ | ||
-- \ | ||
$* | ||
|
||
# Print date after finishing, for good measure ------------------------------ | ||
|
||
>&2 date | ||
>&2 echo "All done. Have a nice day." |
3 changes: 3 additions & 0 deletions
3
...py-cancer_wes/raw/case001subregion-N1-DNA1-WES1/case001subregion-N1-DNA1-WES1.R1.fastq.gz
Git LFS file not shown
3 changes: 3 additions & 0 deletions
3
...py-cancer_wes/raw/case001subregion-N1-DNA1-WES1/case001subregion-N1-DNA1-WES1.R2.fastq.gz
Git LFS file not shown
3 changes: 3 additions & 0 deletions
3
...py-cancer_wes/raw/case001subregion-T1-DNA1-WES1/case001subregion-T1-DNA1-WES1.R1.fastq.gz
Git LFS file not shown
3 changes: 3 additions & 0 deletions
3
...py-cancer_wes/raw/case001subregion-T1-DNA1-WES1/case001subregion-T1-DNA1-WES1.R2.fastq.gz
Git LFS file not shown
Oops, something went wrong.