-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbrongus_user_interface.py
executable file
·151 lines (128 loc) · 8.03 KB
/
brongus_user_interface.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python
##user_interface
import argparse
import Brongus.motifscorecompiler as msc
import Brongus.motifscorereader as msr
import Brongus.common_assembler as ca
import sys
import Brongus.SGDIDConvertermodule as sgdc
if __name__ == "__main__":
Brongusparser = argparse.ArgumentParser()
Brongusparser.description= "After some genes or transcription factor names of S. cerevisae are entered in, data files containing the Position Weight Matrices of the transcription factor motifs will be scanned against the promoter sequences of the genes in order to generate a list of most likely gene targets or transcription factors will be generated."
Brongusparser.add_argument("-fn","--filename", type= str,
help= "If you entered multiple genes or transcription factors, the please enter the name for the csv file you will generate.")
Brongusparser.add_argument("-threshold", type= float, default= 0.0,
help= "Changing the threshold will change which log-odd scores are included in the array of hits, i.e. which associations between transcription factor and genetic sequence are deemed to be hits. By default, this is set to zero")
Brongusparser.add_argument("-genetic_sequences", type= str, default= None,
help= "This argument is not to be changed unless the user specifically wants to run the transcription factor motifs against a new assembly of genetic sequences (which must be in a .fasta format and in the Data folder of Brongus). Otherwise, the file promoter_sequences.fasta is always used as the assembly of genetic sequences.")
###only one of the arguments for -tf and -g can be run at the same. So they must be added to a mutually exclusive group
tf_or_gene_group= Brongusparser.add_mutually_exclusive_group(required= True)
tf_or_gene_group.add_argument("-tf",
"--transcription_factors",
nargs= '*',
type= str,
help= "Please enter the transcription factor or factors whose top gene targets you wish to analyze. Please write the name of the transcription factor with no blanks and in capital letters.")
tf_or_gene_group.add_argument("-g",
"--genes",
nargs= '*',
type= str,
help= "Please enter the gene or genes in order to analyze the transacription factors most likely to target them. Please write the genes with no blanks and in capital letters.")
tf_or_gene_group.add_argument("-tf_fn",
"--transcription_factor_filename",
type= str,
help= "It is also possible to upload a .txt file to the working directory that contains the list of transcription factors to be analyzed. Please format it so that there is one transcription factor per line.")
tf_or_gene_group.add_argument("-g_fn",
"--gene_filename",
type= str,
help= "It is also possible to upload a .txt file to the working directory that contains the list of genes to be analyzed. Please format it so that there is one gene per line.")
args = Brongusparser.parse_args()
##run the proper functions and objects down below with arguments passed into them
m= msc.MotifDict().motif_dict
promoter_sequences= msc.PromoterSequences(args.genetic_sequences).promoter_sequences
#tf pipeline
if args.transcription_factors:
tf_list= args.transcription_factors
tf_list= list(tf_list)
tf_ids= [sgdc.idconverter().getgene(tf) for tf in tf_list]
if len(tf_list)==1:
args.filename = None
print("Now carrying out "+str(2)+" analyses for the transcription factor given... Please be patient.")
sys.stdout.flush()
msc.TFCompiledScores(m,promoter_sequences).compile_scores(tf_list[0], args.threshold)
msr.tf_pickle_reader(tf_list[0])
else:
if args.filename is None:
raise argparse.ArgumentTypeError("You must enter a filename if you provide more than one gene or transcription factor.")
tf_processes= (len(tf_list)*2)+1
print("Now carrying out "+str(tf_processes)+" analyses for the "+str(len(tf_list))+" transcription factors given... Please be patient.")
sys.stdout.flush()
for tf in tf_list:
test3= msc.TFCompiledScores(m,promoter_sequences).compile_scores(tf, args.threshold)
test4= msr.tf_pickle_reader(tf)
test6= ca.common_tf_pickle_assembler(tf_list, args.filename)
#gene pipeline
if args.genes:
gene_list= args.genes
gene_list= list(gene_list)
if len(gene_list)== 1:
args.filename = None
print("Now carrying out "+str(2)+" analyses for the gene given... Please be patient.")
sys.stdout.flush()
test7= msc.GeneCompiledScores(m, promoter_sequences).compile_scores(gene_list[0], args.threshold)
test8= msr.gene_pickle_reader(gene_list[0])
else:
if args.filename is None:
raise argparse.ArgumentTypeError("You must enter a filename if you provide more than one gene or transcription factor.")
gene_processes= (len(gene_list)*2)+1
print("Now carrying out "+str(gene_processes)+" analyses for the "+str(len(gene_list))+" genes given... Please be patient.")
sys.stdout.flush()
for gene in gene_list:
test9= msc.GeneCompiledScores(m,promoter_sequences).compile_scores(gene, args.threshold)
test10= msr.gene_pickle_reader(gene)
test11= ca.common_gene_pickle_assembler(gene_list, args.filename)
##tf filename pipeline
if args.transcription_factor_filename:
tfs= open(args.transcription_factor_filename, 'r')
tf_list= []
for line in tfs:
tf_list+=[line]
if len(tf_list)==1:
args.filename = None
tf_processes= 2
print("Now carrying out "+str(tf_processes)+" analyses for the "+str(len(tf_list))+" transcription factors given... Please be patient.")
sys.stdout.flush()
test3= msc.TFCompiledScores(m,promoter_sequences).compile_scores(tf_list[0], args.threshold)
test4= msr.tf_pickle_reader(tf_list[0])
else:
if args.filename is None:
raise argparse.ArgumentTypeError("You must enter a filename if you provide more than one gene or transcription factor.")
tf_processes= (len(tf_list)*2)+1
print("Now carrying out "+str(tf_processes)+" analyses for the "+str(len(tf_list))+" transcription factors given... Please be patient.")
sys.stdout.flush()
for tf in tf_list:
test3= msc.TFCompiledScores(m,promoter_sequences).compile_scores(tf, args.threshold)
test4= msr.tf_pickle_reader(tf)
test6= ca.common_tf_pickle_assembler(tf_list, args.filename)
##gene filename pipeline
if args.gene_filename:
genes= open(args.gene_filename, 'r')
gene_list= []
for line in genes:
gene_list += [line]
if len(gene_list)==1:
args.filename = None
gene_processes= 2
print("Now carrying out "+str(gene_processes)+" analyses for the "+str(len(gene_list))+" genes given... Please be patient.")
sys.stdout.flush()
test20= msc.GeneCompiledScores(m,promoter_sequences).compile_scores(gene_list[0], args.threshold)
test10= msr.gene_pickle_reader(gene_list[0])
else:
if args.filename is None:
raise argparse.ArgumentTypeError("You must enter a filename if you provide more than one gene or transcription factor.")
gene_processes= (len(gene_list)*2)+1
print("Now carrying out "+str(gene_processes)+" analyses for the "+str(len(gene_list))+" genes given... Please be patient.")
sys.stdout.flush()
for gene in gene_list:
test9= msc.GeneCompiledScores(m,promoter_sequences).compile_scores(gene, args.threshold)
test10= msr.gene_pickle_reader(gene)
test11= ca.common_gene_pickle_assembler(gene_list, args.filename)