config.yaml

# Snakemake configuration file
#
# The variables in this file control the behavior of the Snakefile at the root
# of the project. It starts with raw sequencing reads in FASTQ files and
# converts them to gene counts.

# Paths ------------------------------------------------------------------------

# The following paths must be updated to run the scripts on your machine.

# Paths must end with forward slash

# Enter the path (absolute or relative to the Snakefile) to the local directory
# to store the final data files.
dir_data: data/

# Enter the absolute path to an external directory to store intermediate files.
# It should also contain a subdirectory fastq/ which contains the raw gzipped
# FASTQ files.
dir_external: /project2/gilad/singlecell-qtl/

# Enter the path (absolute or relative to the Snakefile) for writing log
# files. Make sure to also update this path for log files in cluster.json
dir_log: log/

# Samples ---------------------------------------------------------------------

# Select which samples to run. You should select a subset when first configuring
# and testing the pipeline.

# C1 chips
batch1: ["03162017", "03172017", "03232017", "03302017", "03312017",
         "04052017", "04072017", "04132017", "04142017", "04202017"]
batch2: ["08102017", "08112017", "08142017", "08152017", "08162017",
         "08182017", "08212017", "08222017", "08232017", "08242017",
         "08282017", "08292017", "08302017", "08312017"]
batch3: ["09252017", "09262017", "09272017", "10022017", "10042017",
         "10052017", "10062017", "10092017", "10102017", "10112017",
         "10122017", "10132017", "10162017", "10172017"]
batch4: ["10302017", "11022017", "11032017", "11062017", "11072017",
         "11082017", "11092017", "11102017", "11132017", "11142017",
         "11152017", "11162017", "11172017", "11202017", "11212017"]
batch5: ["11272017", "11282017", "11292017", "11302017", "12012017",
         "12042017", "12052017", "12062017", "12072017", "12082017",
         "12112017", "12122017", "12132017", "12142017"]
batch6: ["02192018", "02202018", "02212018", "02222018", "02242018",
         "02262018", "02272018", "02282018", "03012018", "03052018",
         "03062018", "03072018",
         # These are repeats from previous batches:
         # batch 2
         "08102017", "08162017",
         # batch 3
         "09272017", "10092017", "10102017", "10112017",
         # batch5
         "11282017", "12042017"]

# rows of chips
rows: ["A", "B", "C", "D", "E", "F", "G", "H"]

# columns of chips
cols: ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]

# Annotation -------------------------------------------------------------------

# The following variables control which genome release is used for annotation
# and which chromosomes are considered.

# Specify Ensembl release for genome sequence and annotation
# http://feb2014.archive.ensembl.org/index.html
ensembl_archive: feb2014.archive.ensembl.org
ensembl_rel: 75
ensembl_genome_ce: WBcel235.75
ensembl_genome_dm: BDGP5.75
ensembl_genome_hs: GRCh37.75

chr_ce: ["I", "II", "III", "IV", "V", "X", "MtDNA"]
chr_dm: ["2L", "2R", "3L", "3R", "4", "dmel_mitochondrion_genome", "X", "YHet"]
chr_hs: ["1", "2", "3", "4", "5", "6", "7", "8", "9",
         "10", "11", "12", "13", "14", "15", "16",
         "17", "18", "19", "20", "21", "22",
         "X", "Y", "MT"]