pipeline.config


[configFilePipeline]

# ================================ #
#          SBATCH config           #
# -------------------------------- #
# ================================ #

# ----------------
# sbatch partition
# ----------------
# (Required)
# Partition to which jobs are submitted
# Used by: 
#	- variantDiscoveryFJD_panelWES.py

sbatch_partition="bioinfo"


# --------------
# sbatch account
# --------------
# (Required)
# Account to which jobs are submitted
# Used by: 
#	- variantDiscoveryFJD_panelWES.py

sbatch_account="bioinfo_serv"


# ================================ #
#              Files               #
# -------------------------------- #
# ================================ #

# -------------------------
# Reference genome (.fasta)
# -------------------------
# (Required)
# Reference genome to use for the alignment, SNV calling, CNV calling and annotation. 
# Used by: 
#	- variantDiscoveryFJD_panelWES.py

refGenome_path="/home/proyectos/bioinfo/references/hg19/ucsc.hg19.fasta"


# --------------------
# HapMap CNN (.vcf.gz)
# --------------------
# (Required)
# Validated VCF with known sites of common variation. We use a VCF from The HapMap project. Available at: https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/hapmap_3.3.hg38.vcf.gz
# Used by: 
#	- tasks/SNVfiltering.sh

cnn_hapmap_path="/home/proyectos/bioinfo/references/CNNgatk/b37_hapmap_3.3.b37.mainchr.vcf.gz"


# -------------------
# 1000G CNN (.vcf.gz)
# -------------------
# (Required)
# Validated VCF with known sites of common variation. We use a VCF from The 1000 Genomes Project. Available at: https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/1000G_omni2.5.hg38.vcf.gz
# Used by: 
#	- tasks/SNVfiltering.sh

cnn_1000G_path="/home/proyectos/bioinfo/references/CNNgatk/b37_1000G_omni2.5.b37.mainchr.vcf.gz"


#--------------------------------------------
# Index of the reference genome (.fasta.fai)
#--------------------------------------------
# (Optional / Required if CoNVading CNV calling is selected)
# Index of the reference genome to be used by CoNVading (CNV calling algorithm). This file should be a two column file with the name of the chromosomes (without the "chr") and their size (See CoNVaDING documentation for more information).
# Used by: 
#	- pipelines/pipeline9_CNVcalling.sh

fai_path="/home/proyectos/bioinfo/references/hg19/ucsc.hg19_convading.fasta.fai"


# ================================ #
#            Directories           #
# -------------------------------- #
# ================================ #

#------------------
# Scratch directory
#------------------
# (Optional / Required if FASTQ files need concatenation or are retrieved from BaseSpace)
# Temporal directory used for merging FASTQ files. If left blank, then the default directory will be "/scratch/${userName}/${runName}/"
# Used by: 
#	- variantDiscoveryFJD_panelWES.py

scratch_dir=""


#-------------------
# DB directory (VCF)
#-------------------
# (Optional / Required if -F/--mafincorporation and -A/--annotation are selected as arguments) 
# Dictory where to move a copy of the VCF file with the SNVs and INDELs for data base creation.
# Used by: 
#	- pipelines/pipeline4_LohAnnotationOutput.sh

db_vcf_path="/home/proyectos/bioinfo/fjd/MAF_FJD_v3.0/individual_vcf/new_vcf/"


#-------------------
# DB directory (BED)
#-------------------
# (Optional / Required if -F/--mafincorporation and -A/--annotation are selected as arguments) 
# Dictory where to move a copy of the BED file with the covered regions for data base creation. 
# Used by: 
#	- pipelines/pipeline4_LohAnnotationOutput.sh

db_coverage_path='/home/proyectos/bioinfo/fjd/MAF_FJD_v3.0/coverage/new_bed/'


# ================================ #
#      Binaries & Executables      #
# -------------------------------- #
# ================================ #

# ------------------------------------------------
# The BaseSpace Sequence Hub CLI tool suite binary
# ------------------------------------------------
# (Optional / Required if FASTQ files are retrieved from BaseSpace: -b/--basespace argument)
# Used by: 
#	- variantDiscoveryFJD_panelWES.py

baseSpace_bin="/home/proyectos/bioinfo/software/bs"


# ---------------------
# BaseSpace-copy binary
# ---------------------
# (Optional / Required if FASTQ files are retrieved from BaseSpace: -b/--basespace argument)
# Used by: 
#	- tasks/fastqDownloadCat.py

baseSpacecp_bin="/home/proyectos/bioinfo/software/bs-cp"


# ---------------
# GATK executable
# ---------------
# (Required)
# Used by: 
#	- tasks/mapping.sh
#	- tasks/BAMpreprocessing.sh
#	- tasks/SNVcalling.sh
#	- tasks/SNVfiltering.sh
#	- tasks/combinedGVCF.sh
#	- tasks/genotyping.sh
#	- tasks/genotypeRefinement.sh

gatkPath_path="/usr/local/bioinfo/gatk/4.2.0/gatk-package-4.2.0.0-local.jar"


# -----------------
# PICARD executable
# -----------------
# (Required)
# Used by: 
#	- tasks/mapping.sh ??
#	- tasks/BAMpreprocessing.sh ??
# 	- tasks/SNVcalling.sh ??
#	- tasks/combinedGVCF.sh ??
#	- tasks/genotyping.sh ??

picard_path="/usr/local/bioinfo/picard-tools/2.18.9/picard.jar"


# ------------
# conda binary
# ------------
# (Required)
# Used by: 
#	- tasks/SNVfiltering.sh
#	- tasks/genotypeRefinement.sh

conda_bin="/usr/local/miniconda/python-3.6/bin/conda"


# ------------
# PLINK binary
# ------------
# (Optional / Required if -A/--annotation is selected as argument) 
# Used by: 
#	- tasks/LOH.sh

plink_bin="/usr/local/bioinfo/plink/plink"


# ---------------------------
# Path of AnnotSV installation 
# ---------------------------
# (Optional / Required if CNV calling analysis is selected -a/--analysis cnv/all)
# Used by: 
#	- pipelines/pipeline9_CNVcalling.sh

annotsv_path="/usr/local/bioinfo/annotsv/2.2"


# ================================ #
# ENSEMBL VARIANT EFFECT PREDICTOR #
# -------------------------------- #
# ================================ #

# Due to the highly customization of of ENSEMBL Variant Effect Predictor (VEP) absolute paths are kept in the script "tasks/VEPannotation.sh" and "tasks/PVM_Cluster.py".To learn more about VEP installation and usage please visit: https://www.ensembl.org/info/docs/tools/vep/script/index.html
# Our pipeline also annotate using several plugins and custom databases, plese feel free to use our code as an example to annotate yours