-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparam.txt
101 lines (76 loc) · 6.92 KB
/
param.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# This is the parameter file for the pathogen discovery pipeline.
# Every command (or "module", if you like) has its own settings. The settings for a particular module must follow its "command module" line. Otherwise, the order doesn't matter.
# How to use this parameter file:
# for boolean options, use "yes" or "1" for assent OR "no" or "0" or "-" for dissent (or simply comment out or omit the line).
# the default settings are generally "no" unless otherwise specified
# note: make sure nt and nr are up-to-date!
# -------------------
command step1
seq_platform illumina # choices are: illumina or 454
# explanation: must be run first --- hence titled step1. this module maps the fastq IDs into simple numerical IDs and processes .sff file if 454
# -------------------
command quality_filter
cutadapt_options_R1 -a GATCGGAAGAGCACACGTCTGAACTCCAGTCAC -g GCCGGAGCTCTGCAGATATC -a GATATCTGCAGAGCTCCGGC -m 50 --match-read-wildcards
cutadapt_options_R2 -a AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT -g GCCGGAGCTCTGCAGATATC -a GATATCTGCAGAGCTCCGGC -m 50 --match-read-wildcards
# cutadapt_options -g GCCGGAGCTCTGCAGATATC -m 20 --match-read-wildcards # this should have every flag except the input and output ones
# cutadapt_options2 -g CGCCGTTTCCCAGTAGGTCTC -m 20 --match-read-wildcards # this should have every flag except the input and output ones
# prinseq_options -log -verbose -min_len 50 -ns_max_p 10 -derep 12345 # this should have every flag except the input and output ones (i.e., don't specify the "-out_good" "-out_bad" or "fastq" flags)
prinseq_options -min_len 50 -derep 14 -lc_method dust -lc_threshold 3 -trim_ns_left 1 -trim_ns_right 1 -trim_qual_right 15 # this should have every flag except the input and output ones (i.e., don't specify the "-out_good" "-out_bad" or "fastq" flags)
# explanation: run up to 2 iterations of cutadapt, if specified, and prinseq, if specified
# -------------------
command host_map
# for the "_list" settings, use a comma-delimited list with no spaces
mapper_program_list bowtie2,bowtie2 # choices are: bwa, bowtie2
mapper_db_list /media/VD_Research/databases/humandna/hg38,/media/VD_Research/databases/humanrna/hg38_mrna
mapper_name_list bowtie2_genome_local,bowtie2_transcript_local # names that will appear on a graph
mapper_options_list --local -p 10,--local -p 10 # flags for aligner(-p specifies how many threads to use)
# explanation: perform a chain of alignments.
command iterative_blast_phylo
# for the "_list" settings, use a comma-delimited list with no spaces
blast_db_list /media/VD_Research/databases/ncbi/blast/nt/nt,/media/VD_Research/databases/ncbi/blast/nt/nt #,/media/VD_Research/databases/diamond/diamondnr blast db prefix
blast_task_list megablast,dc-megablast # options are: megablast dc-megablast blastn, diamond
# blast_options_list -evalue 1e-4 -word_size 28,-evalue 1e-4 -word_size 12,-evalue 1e-4 -word_size 7 # blast options (except for: -task -query -db -out -outfmt -num_descriptions; these are
blast_options_list -evalue 1e-4 -word_size 28,-evalue 1e-4 -word_size 12 ##,--compress 0 -p 0 -v -k 10 -w 28 --id 0.7 -c 4 -t ~/tmp blast options (except for: -task -query -db -out -outfmt -num_descriptions; these are hardwired)
ninst_list 10,10 # the input file will be broken into chunks and blasted in parallel - this parameter is the number of instances of BLAST you want to run in parallel
taxonomy_names /media/VD_Research/databases/ncbi/taxonomy/names.dmp # NCBI taxonomy names dump file
taxonomy_nodes /media/VD_Research/databases/ncbi/taxonomy/nodes.dmp # NCBI taxonomy nodes dump file
blast_pro_db /media/VD_Research/databases/ncbi/blast/nr/nr # BLAST Protien database used for annotation
# explanation: perform a chain of blasts. the blast is performed in chunks to speed up the process. note: if you use diamond, make sure the chunks are really small. otherwise, it takes a long time.
# -------------------
command iterative_blast_phylo_2
# this is the same as iterative_blast_phylo --- it just allows you to do it an n-th time
# for the "_list" settings, use a comma-delimited list with no spaces
blast_db_list /media/VD_Research/databases/ncbi/blast/nt/nt,/media/VD_Research/databases/ncbi/blast/nt/nt # blast db prefix
blast_task_list megablast,dc-megablast # options are: megablast dc-megablast blastn diamond
# blast_options_list -evalue 1e-4 -word_size 28,-evalue 1e-4 -word_size 12,-evalue 1e-4 -word_size 7 # blast options (except for: -task -query -db -out -outfmt -num_descriptions; these are
blast_options_list -evalue 1e-4 -word_size 28,-evalue 1e-4 -word_size 12 # blast options (except for: -task -query -db -out -outfmt -num_descriptions; these are hardwired)
ninst_list 10,10 # the input file will be broken into chunks and blasted in parallel - this parameter is the number of instances of BLAST you want to run in parallel
taxonomy_names /media/VD_Research/databases/ncbi/taxonomy/names.dmp # NCBI taxonomy names dump file
taxonomy_nodes /media/VD_Research/databases/ncbi/taxonomy/nodes.dmp # NCBI taxonomy nodes dump file
# explanation: run another instance of the iterative_blast_phylo module. you can run more instances by adding entries in this parameter file for "iterative_blast_3", "iterative_blast_4", etc
#
command iterative_blast_phylo_3
# Run diamond on get_orf output
blast_db_list /media/VD_Research/databases/diamond/diamondnr # blast db prefix
blast_task_list diamond # options are: megablast dc-megablast blastn diamond
# blast_options_list -evalue 1e-4 -word_size 28,-evalue 1e-4 -word_size 12,-evalue 1e-4 -word_size 7 # blast options (except for: -task -query -db -out -outfmt -num_descriptions; these are
blast_options_list --compress 0 -p 0 -v -k 10 -w 28 --id 0.7 -c 4 -t ~/tmp
ninst_list 10
taxonomy_names /media/VD_Research/databases/ncbi/taxonomy/names.dmp # NCBI taxonomy names dump file
taxonomy_nodes /media/VD_Research/databases/ncbi/taxonomy/nodes.dmp # NCBI taxonomy nodes dump file
blast_pro_db /media/VD_Research/databases/ncbi/blast/nr/nr
# -------------------
command ray2_assembly
kmer 25 # assembler k-mer
ninst 10 # number of instances for mpiexec
cap 1 # use cap after ray
# cap_options # cap options
map2contigs yes # if "yes" or "1", map reads back onto assembly
bowtie2_options --local -p 10 # only nec if map2contigs. Options for the mapper(-p specifies how many threads)
# explanation: perform an assembly using Ray
parse_contigs yes #if yes or 1 parse contigs into reads
# -------------------
command orf_filter
# filter fasta input by orf
getorf_options -minsize 60 -find 0 # any options other than "-sequence" and "-outseq"
# -------------------