forked from UppsalaGenomeCenter/CanvasDB
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcanvasDB_setup.R
215 lines (172 loc) · 7.42 KB
/
canvasDB_setup.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
############################################################################
##
## File: canvasDB_setup.R
##
## Author: Adam Ameur, Uppsala Genome Center
##
## Description: Functions for building and populating a MySQL database with
## SNP and indel data from high-throughput sequencing experinents.
##
############################################################################
## Easy way to set up the MySQL connection.
## file - a .my.cnf file
## group - an appropriate group within the .my.cnf file
getDBCon_file <- function(file, group)
{
require(RMySQL)
drv <- dbDriver("MySQL")
dbConnect(drv, default.file = file, group = group)
}
## Error controlled version of dbGetQuery
dbGetQuery_E <- function(con,q,TALK = FALSE)
{
if(TALK)
cat(q,"\n")
err <- try(tmp <- dbGetQuery(con, q) ,silent = FALSE)
if(inherits(err,"try-error"))
{
dbDisconnect(con)
stop("Failed to execute SQL-query")
}
if(is.null(tmp))
tmp <- data.frame()
return(tmp)
}
## Returns a db connection
connectToInhouseDB <- function(db.name=indbDatabaseName){
mysqlfile <- paste(rootDir,mysqlConfigFile,sep="")
con <- getDBCon_file(mysqlfile, db.name)
return(con)
}
## Creates an empty db
initDB <- function(TALK=FALSE){
con <- connectToInhouseDB()
## Create run info table
run.table <- dbTables[["run"]]
query <- paste("DROP TABLE IF EXISTS ",run.table, sep="")
tmp <- dbGetQuery_E(con, query, TALK=TALK)
query <- paste("CREATE TABLE ",run.table,
"(sample_id int(10), ",
"canvas_id varchar(20), ",
"library_type varchar(20), ",
"read_type varchar(20), ",
"capture_method varchar(100), ",
"seq_platform varchar(20), ",
"instrument_name varchar(20), ",
"file_format varchar(20), ",
"total_reads int(20), ",
"reads_on_target int(20), ",
"run_start date, ",
"PRIMARY KEY (sample_id)",
") ENGINE=",mysqlEngine," DEFAULT CHARSET=latin1",sep="")
tmp <- dbGetQuery_E(con,query,TALK=TALK)
## Create sample table
sample.table <- dbTables[["sample"]]
query <- paste("DROP TABLE IF EXISTS ",sample.table, sep="")
tmp <- dbGetQuery_E(con, query, TALK=TALK)
query <- paste("CREATE TABLE ",sample.table,
"(sample_id int(10) NOT NULL AUTO_INCREMENT , ",
"canvas_id varchar(20), ",
"sample_name varchar(50), ",
"gender varchar(10), ",
"phenotypes varchar(200), ",
"comments varchar(255), ",
"principal_investigator varchar(20), ",
"PRIMARY KEY (sample_id)",
") ENGINE=",mysqlEngine," DEFAULT CHARSET=latin1",sep="")
tmp <- dbGetQuery_E(con,query,TALK=TALK)
## Create SNP summary table
SNPsummary.table <- dbTables[["SNP.summary"]]
dbSNP.table <- dbTables[["dbSNP"]]
dbSNPcommon.table <- dbTables[["dbSNPcommon"]]
query <- paste("DROP TABLE IF EXISTS ",SNPsummary.table, sep="")
tmp <- dbGetQuery_E(con, query, TALK=TALK)
query <- paste("CREATE TABLE ",SNPsummary.table,
"(SNP_id varchar(20), ",
"chr varchar(10), ",
"pos integer(11), ",
"ref varchar(1), ",
"alt varchar(1), ",
"nr_samples integer(6),",
"samples blob,",
"snp",dbSNPversion," varchar(20), ",
# "class varchar(40), ",
"severity integer(2), ",
# "gene varchar(100), ",
# "details varchar(200), ",
# "sift varchar(10), ",
"PRIMARY KEY(SNP_id)",
") ENGINE=",mysqlEngine," DEFAULT CHARSET=latin1",sep="")
tmp <- dbGetQuery_E(con,query,TALK=TALK)
# query <- paste("CREATE INDEX summary_idx ON ",SNPsummary.table," (nr_samples, snp",dbSNPversion,"common, snp",dbSNPversion,", severity);",sep="")
query <- paste("CREATE INDEX summary_idx ON ",SNPsummary.table," (nr_samples, snp",dbSNPversion,");",sep="")
tmp <- dbGetQuery_E(con,query,TALK=TALK)
query <- paste("CREATE INDEX chrpos ON ",SNPsummary.table," (chr, pos);",sep="")
tmp <- dbGetQuery_E(con,query,TALK=TALK)
indelSummary.table <- dbTables[["indel.summary"]]
query <- paste("DROP TABLE IF EXISTS ",indelSummary.table, sep="")
tmp <- dbGetQuery_E(con, query, TALK=TALK)
query <- paste("CREATE TABLE ",indelSummary.table,
"(indel_id varchar(500), ",
"chr varchar(10), ",
"start integer(11), ",
"end integer(11), ",
"ref varchar(200), ",
"alt varchar(200), ",
"type varchar(100), ",
"size integer(10), ",
"nr_samples integer(6),",
"samples blob,",
"snp",dbSNPversion," varchar(20), ",
# "class varchar(40), ",
"severity integer(2), ",
# "gene varchar(100), ",
# "details varchar(200), ",
"PRIMARY KEY(indel_id)",
") ENGINE=",mysqlEngine," DEFAULT CHARSET=latin1",sep="")
tmp <- dbGetQuery_E(con,query,TALK=TALK)
#query <- paste("CREATE INDEX summary_idx ON ",indelSummary.table," (nr_samples, snp",dbSNPversion,"common, snp",dbSNPversion,", severity);",sep="")
query <- paste("CREATE INDEX summary_idx ON ",indelSummary.table," (nr_samples, snp",dbSNPversion,");",sep="")
tmp <- dbGetQuery_E(con,query,TALK=TALK)
query <- paste("CREATE INDEX chrpos ON ",indelSummary.table," (chr, start, end);",sep="")
tmp <- dbGetQuery_E(con,query,TALK=TALK)
## Drop SNP data tables
SNPtablesExists <- TRUE
while(SNPtablesExists){
query <- paste("SELECT GROUP_CONCAT(table_name) FROM information_schema.tables WHERE table_schema = '",indbDatabaseName,"' AND table_name like 'snp_data_%';",sep="")
tmp <- dbGetQuery_E(con,query,TALK=TALK)
SNPdataTables <- strsplit(tmp[,1],",")[[1]]
if(length(SNPdataTables)==1){
if(is.na(SNPdataTables)){
SNPtablesExists <- FALSE
}
}
if(SNPtablesExists){
for(SNPdataTable in SNPdataTables){
print(SNPdataTable)
query <- paste("DROP TABLE IF EXISTS ",SNPdataTable,sep="")
tmp <- dbGetQuery_E(con,query, TALK=TALK)
}
}
}
## Drop indel data tables
indelTablesExists <- TRUE
while(indelTablesExists){
query <- paste("SELECT GROUP_CONCAT(table_name) FROM information_schema.tables WHERE table_schema = '",indbDatabaseName,"' AND table_name like 'indel_data_%';",sep="")
tmp <- dbGetQuery_E(con,query,TALK=TALK)
indelDataTables <- strsplit(tmp[,1],",")[[1]]
if(length(indelDataTables)==1){
if(is.na(indelDataTables)){
indelTablesExists <- FALSE
}
}
if(indelTablesExists){
for(indelDataTable in indelDataTables){
print(indelDataTable)
query <- paste("DROP TABLE IF EXISTS ",indelDataTable,sep="")
tmp <- dbGetQuery_E(con,query, TALK=TALK)
}
}
}
dbDisconnect(con)
}