-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsetup_test_db.py
408 lines (374 loc) · 24.6 KB
/
setup_test_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
# flake8: noqa
from sqlalchemy import text
import crawler.config.test as config
from crawler.db.mysql import create_mysql_connection_engine
# Set up a basic MLWH db for testing
"""Drop and recreate required tables."""
print("Initialising the test MySQL MLWH database")
sql_engine = create_mysql_connection_engine(config.WAREHOUSES_RW_CONN_STRING)
create_db = """
CREATE DATABASE IF NOT EXISTS `unified_warehouse_test` /*!40100 DEFAULT CHARACTER SET latin1 */;
"""
drop_table_lh_sample = """
DROP TABLE IF EXISTS `unified_warehouse_test`.`lighthouse_sample`;
"""
drop_table_sample = """
DROP TABLE IF EXISTS `unified_warehouse_test`.`sample`;
"""
drop_table_stock_resource = """
DROP TABLE IF EXISTS `unified_warehouse_test`.`stock_resource`;
"""
drop_table_study = """
DROP TABLE IF EXISTS `unified_warehouse_test`.`study`;
"""
create_table_lh_sample = """
CREATE TABLE `unified_warehouse_test`.`lighthouse_sample` (
`id` bigint NOT NULL AUTO_INCREMENT,
`mongodb_id` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Auto-generated id from MongoDB',
`root_sample_id` varchar(255) COLLATE utf8_unicode_ci NOT NULL COMMENT 'Id for this sample provided by the Lighthouse lab',
`cog_uk_id` varchar(255) COLLATE utf8_unicode_ci NOT NULL COMMENT 'Consortium-wide id, generated by Sanger on import to LIMS',
`cog_uk_id_unique` tinyint(1) DEFAULT '1' COMMENT 'A flag indicating whether the COG UK ID value should be unique. NULL means no.',
`rna_id` varchar(255) COLLATE utf8_unicode_ci NOT NULL COMMENT 'Lighthouse lab-provided id made up of plate barcode and well',
`plate_barcode` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Barcode of plate sample arrived in, from rna_id',
`coordinate` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Well position from plate sample arrived in, from rna_id',
`result` varchar(255) COLLATE utf8_unicode_ci NOT NULL COMMENT 'Covid-19 test result from the Lighthouse lab',
`date_tested_string` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'When the covid-19 test was carried out by the Lighthouse lab',
`date_tested` datetime DEFAULT NULL COMMENT 'date_tested_string in date format',
`source` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Lighthouse centre that the sample came from',
`lab_id` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Id of the lab, within the Lighthouse centre',
`ch1_target` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Target for channel 1',
`ch1_result` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Result for channel 1',
`ch1_cq` decimal(11,8) DEFAULT NULL COMMENT 'Cq value for channel 1',
`ch2_target` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Target for channel 2',
`ch2_result` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Result for channel 2',
`ch2_cq` decimal(11,8) DEFAULT NULL COMMENT 'Cq value for channel 2',
`ch3_target` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Target for channel 3',
`ch3_result` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Result for channel 3',
`ch3_cq` decimal(11,8) DEFAULT NULL COMMENT 'Cq value for channel 3',
`ch4_target` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Target for channel 4',
`ch4_result` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Result for channel 4',
`ch4_cq` decimal(11,8) DEFAULT NULL COMMENT 'Cq value for channel 4',
`filtered_positive` tinyint(1) DEFAULT NULL COMMENT 'Filtered positive result value',
`filtered_positive_version` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Filtered positive version',
`filtered_positive_timestamp` datetime DEFAULT NULL COMMENT 'Filtered positive timestamp',
`lh_sample_uuid` varchar(36) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Sample uuid created in crawler',
`lh_source_plate_uuid` varchar(36) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Source plate uuid created in crawler',
`created_at` datetime DEFAULT NULL COMMENT 'When this record was inserted',
`updated_at` datetime DEFAULT NULL COMMENT 'When this record was last updated',
`must_sequence` tinyint(1) DEFAULT NULL COMMENT 'PAM provided value whether sample is of high importance',
`preferentially_sequence` tinyint(1) DEFAULT NULL COMMENT 'PAM provided value whether sample is important',
`is_current` tinyint(1) NOT NULL DEFAULT '0' COMMENT 'Identifies if this sample has the most up to date information for the same rna_id',
PRIMARY KEY (`id`),
UNIQUE KEY `index_lighthouse_sample_on_root_sample_id_and_rna_id_and_result` (`root_sample_id`,`rna_id`,`result`),
UNIQUE KEY `index_lighthouse_sample_on_cog_uk_id_and_cog_uk_id_unique` (`cog_uk_id`, `cog_uk_id_unique`),
UNIQUE KEY `index_lighthouse_sample_on_lh_sample_uuid` (`lh_sample_uuid`),
UNIQUE KEY `index_lighthouse_sample_on_mongodb_id` (`mongodb_id`),
KEY `index_lighthouse_sample_on_date_tested` (`date_tested`),
KEY `index_lighthouse_sample_on_filtered_positive` (`filtered_positive`),
KEY `index_lighthouse_sample_on_rna_id` (`rna_id`),
KEY `index_lighthouse_sample_on_plate_barcode_and_created_at` (`plate_barcode`,`created_at`),
KEY `index_lighthouse_sample_on_result` (`result`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
"""
create_table_sample = """
CREATE TABLE `unified_warehouse_test`.`sample` (
`id_sample_tmp` int(10) unsigned NOT NULL AUTO_INCREMENT COMMENT 'Internal to this database id, value can change',
`id_lims` varchar(10) COLLATE utf8_unicode_ci NOT NULL COMMENT 'LIM system identifier, e.g. CLARITY-GCLP, SEQSCAPE',
`uuid_sample_lims` varchar(36) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'LIMS-specific sample uuid',
`id_sample_lims` varchar(20) COLLATE utf8_unicode_ci NOT NULL COMMENT 'LIMS-specific sample identifier',
`last_updated` datetime NOT NULL COMMENT 'Timestamp of last update',
`recorded_at` datetime NOT NULL COMMENT 'Timestamp of warehouse update',
`deleted_at` datetime DEFAULT NULL COMMENT 'Timestamp of sample deletion',
`created` datetime DEFAULT NULL COMMENT 'Timestamp of sample creation',
`name` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`reference_genome` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`organism` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`accession_number` varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL,
`common_name` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`description` text COLLATE utf8_unicode_ci,
`taxon_id` int(6) unsigned DEFAULT NULL,
`father` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`mother` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`replicate` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`ethnicity` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`gender` varchar(20) COLLATE utf8_unicode_ci DEFAULT NULL,
`cohort` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`country_of_origin` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`geographical_region` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`sanger_sample_id` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`control` tinyint(1) DEFAULT NULL,
`supplier_name` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`public_name` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`sample_visibility` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`strain` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`consent_withdrawn` tinyint(1) NOT NULL DEFAULT '0',
`donor_id` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`phenotype` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'The phenotype of the sample as described in Sequencescape',
`developmental_stage` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Developmental Stage',
`control_type` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
PRIMARY KEY (`id_sample_tmp`),
UNIQUE KEY `index_sample_on_id_sample_lims_and_id_lims` (`id_sample_lims`,`id_lims`),
UNIQUE KEY `sample_uuid_sample_lims_index` (`uuid_sample_lims`),
KEY `sample_accession_number_index` (`accession_number`),
KEY `sample_name_index` (`name`)
) ENGINE=InnoDB AUTO_INCREMENT=4925703 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
"""
create_table_stock_resource = """
CREATE TABLE `unified_warehouse_test`.`stock_resource` (
`id_stock_resource_tmp` int(11) NOT NULL AUTO_INCREMENT,
`last_updated` datetime NOT NULL COMMENT 'Timestamp of last update',
`recorded_at` datetime NOT NULL COMMENT 'Timestamp of warehouse update',
`created` datetime NOT NULL COMMENT 'Timestamp of initial registration of stock in LIMS',
`deleted_at` datetime DEFAULT NULL COMMENT 'Timestamp of initial registration of deletion in parent LIMS. NULL if not deleted.',
`id_sample_tmp` int(10) unsigned NOT NULL COMMENT 'Sample id, see "sample.id_sample_tmp"',
`id_study_tmp` int(10) unsigned NOT NULL COMMENT 'Sample id, see "study.id_study_tmp"',
`id_lims` varchar(10) COLLATE utf8_unicode_ci NOT NULL COMMENT 'LIM system identifier',
`id_stock_resource_lims` varchar(20) COLLATE utf8_unicode_ci NOT NULL COMMENT 'Lims specific identifier for the stock',
`stock_resource_uuid` varchar(36) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'Uuid identifier for the stock',
`labware_type` varchar(255) COLLATE utf8_unicode_ci NOT NULL COMMENT 'The type of labware containing the stock. eg. Well, Tube',
`labware_machine_barcode` varchar(255) COLLATE utf8_unicode_ci NOT NULL COMMENT 'The barcode of the containing labware as read by a barcode scanner',
`labware_human_barcode` varchar(255) COLLATE utf8_unicode_ci NOT NULL COMMENT 'The barcode of the containing labware in human readable format',
`labware_coordinate` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'For wells, the coordinate on the containing plate. Null for tubes.',
`current_volume` float DEFAULT NULL COMMENT 'The current volume of material in microlitres based on measurements and know usage',
`initial_volume` float DEFAULT NULL COMMENT 'The result of the initial volume measurement in microlitres conducted on the material',
`concentration` float DEFAULT NULL COMMENT 'The concentration of material recorded in the lab in nanograms per microlitre',
`gel_pass` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'The recorded result for the qel QC assay.',
`pico_pass` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'The recorded result for the pico green assay. A pass indicates a successful assay, not sufficient material.',
`snp_count` int(11) DEFAULT NULL COMMENT 'The number of markers detected in genotyping assays',
`measured_gender` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'The gender call base on the genotyping assay',
PRIMARY KEY (`id_stock_resource_tmp`),
KEY `fk_stock_resource_to_sample` (`id_sample_tmp`),
KEY `fk_stock_resource_to_study` (`id_study_tmp`),
KEY `composition_lookup_index` (`id_stock_resource_lims`,`id_sample_tmp`,`id_lims`),
CONSTRAINT `fk_stock_resource_to_sample` FOREIGN KEY (`id_sample_tmp`) REFERENCES `sample` (`id_sample_tmp`),
CONSTRAINT `fk_stock_resource_to_study` FOREIGN KEY (`id_study_tmp`) REFERENCES `study` (`id_study_tmp`)
) ENGINE=InnoDB AUTO_INCREMENT=4656364 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
"""
create_table_study = """
CREATE TABLE `unified_warehouse_test`.`study` (
`id_study_tmp` int(10) unsigned NOT NULL AUTO_INCREMENT COMMENT 'Internal to this database id, value can change',
`id_lims` varchar(10) COLLATE utf8_unicode_ci NOT NULL COMMENT 'LIM system identifier, e.g. GCLP-CLARITY, SEQSCAPE',
`uuid_study_lims` varchar(36) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'LIMS-specific study uuid',
`id_study_lims` varchar(20) COLLATE utf8_unicode_ci NOT NULL COMMENT 'LIMS-specific study identifier',
`last_updated` datetime NOT NULL COMMENT 'Timestamp of last update',
`recorded_at` datetime NOT NULL COMMENT 'Timestamp of warehouse update',
`deleted_at` datetime DEFAULT NULL COMMENT 'Timestamp of study deletion',
`created` datetime DEFAULT NULL COMMENT 'Timestamp of study creation',
`name` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`reference_genome` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`ethically_approved` tinyint(1) DEFAULT NULL,
`faculty_sponsor` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`state` varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL,
`study_type` varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL,
`abstract` text COLLATE utf8_unicode_ci,
`abbreviation` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`accession_number` varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL,
`description` text COLLATE utf8_unicode_ci,
`contains_human_dna` tinyint(1) DEFAULT NULL COMMENT 'Lane may contain human DNA',
`contaminated_human_dna` tinyint(1) DEFAULT NULL COMMENT 'Human DNA in the lane is a contaminant and should be removed',
`data_release_strategy` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`data_release_sort_of_study` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`ena_project_id` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`study_title` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`study_visibility` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`ega_dac_accession_number` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`array_express_accession_number` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`ega_policy_accession_number` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`data_release_timing` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`data_release_delay_period` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`data_release_delay_reason` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`remove_x_and_autosomes` tinyint(1) NOT NULL DEFAULT '0',
`aligned` tinyint(1) NOT NULL DEFAULT '1',
`separate_y_chromosome_data` tinyint(1) NOT NULL DEFAULT '0',
`data_access_group` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`prelim_id` varchar(20) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'The preliminary study id prior to entry into the LIMS',
`hmdmc_number` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'The Human Materials and Data Management Committee approval number(s) for the study.',
`data_destination` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'The data destination type(s) for the study. It could be ''standard'', ''14mg'' or ''gseq''. This may be extended, if Sanger gains more external customers. It can contain multiply destinations separated by a space.',
`s3_email_list` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`data_deletion_period` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
PRIMARY KEY (`id_study_tmp`),
UNIQUE KEY `study_id_lims_id_study_lims_index` (`id_lims`,`id_study_lims`),
UNIQUE KEY `study_uuid_study_lims_index` (`uuid_study_lims`),
KEY `study_accession_number_index` (`accession_number`),
KEY `study_name_index` (`name`)
) ENGINE=InnoDB AUTO_INCREMENT=6148 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
"""
with sql_engine.connect() as connection:
connection.execute(text(create_db))
print("*** Dropping table LIGHTHOUSE SAMPLE ***")
connection.execute(text(drop_table_lh_sample))
print("*** Dropping table STOCK RESOURCE ***")
connection.execute(text(drop_table_stock_resource))
print("*** Dropping table STUDY ***")
connection.execute(text(drop_table_study))
print("*** Dropping table SAMPLE ***")
connection.execute(text(drop_table_sample))
print("*** Creating table SAMPLE ***")
connection.execute(text(create_table_sample))
print("*** Creating table STUDY ***")
connection.execute(text(create_table_study))
print("*** Creating table STOCK RESOURCE ***")
connection.execute(text(create_table_stock_resource))
print("*** Creating table LIGHTHOUSE SAMPLE ***")
connection.execute(text(create_table_lh_sample))
print("Initialising the test MySQL events warehouse database")
create_db = """
CREATE DATABASE IF NOT EXISTS `event_warehouse_test` /*!40100 DEFAULT CHARACTER SET latin1 */;
"""
drop_table_subjects = """
DROP TABLE IF EXISTS `event_warehouse_test`.`subjects`;
"""
drop_table_roles = """
DROP TABLE IF EXISTS `event_warehouse_test`.`roles`;
"""
drop_table_events = """
DROP TABLE IF EXISTS `event_warehouse_test`.`events`;
"""
drop_table_event_types = """
DROP TABLE IF EXISTS `event_warehouse_test`.`event_types`;
"""
drop_table_subject_types = """
DROP TABLE IF EXISTS `event_warehouse_test`.`subject_types`;
"""
drop_view_cherrypicked_samples = """
DROP VIEW IF EXISTS `unified_warehouse_test`.`cherrypicked_samples`;
"""
drop_table_role_types = """
DROP TABLE IF EXISTS `event_warehouse_test`.`role_types`;
"""
create_table_subjects = """
CREATE TABLE `event_warehouse_test`.`subjects` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`uuid` binary(16) NOT NULL COMMENT 'A binary encoded UUID use HEX(uuid) to retrieve the original (minus dashes)',
`friendly_name` varchar(255) COLLATE utf8_unicode_ci NOT NULL COMMENT 'A user readable identifier for the subject',
`subject_type_id` int(11) NOT NULL COMMENT 'References the event type',
`created_at` datetime DEFAULT NULL,
`updated_at` datetime DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `index_subjects_on_uuid` (`uuid`) USING BTREE,
KEY `index_subjects_on_friendly_name` (`friendly_name`) USING BTREE,
KEY `fk_rails_b7f2e355a0` (`subject_type_id`) USING BTREE,
CONSTRAINT `fk_rails_b7f2e355a0` FOREIGN KEY (`subject_type_id`) REFERENCES `subject_types` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=4198465 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
"""
create_table_roles = """
CREATE TABLE `event_warehouse_test`.`roles` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`event_id` int(11) NOT NULL COMMENT 'Associate with the event (what happened)',
`subject_id` int(11) NOT NULL COMMENT 'Associate with the subject (what it happened to, or what might care)',
`role_type_id` int(11) NOT NULL COMMENT 'References the role_types table, describing the role',
`created_at` datetime DEFAULT NULL,
`updated_at` datetime DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `index_roles_on_event_id` (`event_id`) USING BTREE,
KEY `fk_rails_df614e5484` (`role_type_id`) USING BTREE,
KEY `index_roles_on_subject_id` (`subject_id`) USING BTREE,
CONSTRAINT `fk_rails_42eade4dd3` FOREIGN KEY (`subject_id`) REFERENCES `subjects` (`id`),
CONSTRAINT `fk_rails_df614e5484` FOREIGN KEY (`role_type_id`) REFERENCES `role_types` (`id`),
CONSTRAINT `fk_rails_e0c7d3e302` FOREIGN KEY (`event_id`) REFERENCES `events` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=51090114 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
"""
create_table_events = """
CREATE TABLE `event_warehouse_test`.`events` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`lims_id` varchar(255) COLLATE utf8_unicode_ci NOT NULL COMMENT 'Identifier for the originating LIMS. eg. SQSCP for Sequencesacape',
`uuid` binary(16) NOT NULL COMMENT 'A binary encoded UUID use HEX(uuid) to retrieve the original (minus dashes)',
`event_type_id` int(11) NOT NULL COMMENT 'References the event type',
`occured_at` datetime NOT NULL COMMENT 'The time at which the event was recorded as happening. Other timestamps record when the event entered the database',
`user_identifier` varchar(255) COLLATE utf8_unicode_ci NOT NULL,
`created_at` datetime DEFAULT NULL,
`updated_at` datetime DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `index_events_on_uuid` (`uuid`) USING BTREE,
KEY `fk_rails_75f14fef31` (`event_type_id`) USING BTREE,
CONSTRAINT `fk_rails_75f14fef31` FOREIGN KEY (`event_type_id`) REFERENCES `event_types` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1268003 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
"""
create_table_event_types = """
CREATE TABLE `event_warehouse_test`.`event_types` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`key` varchar(255) COLLATE utf8_unicode_ci NOT NULL COMMENT 'The identifier for the event',
`description` text COLLATE utf8_unicode_ci NOT NULL COMMENT 'A description of the meaning of the event',
`created_at` datetime DEFAULT NULL,
`updated_at` datetime DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `index_event_types_on_key` (`key`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=51468 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
"""
create_table_subject_types = """
CREATE TABLE `event_warehouse_test`.`subject_types` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`key` varchar(255) COLLATE utf8_unicode_ci NOT NULL COMMENT 'The identifier for the role type',
`description` text COLLATE utf8_unicode_ci NOT NULL COMMENT 'A description of the subject type',
`created_at` datetime DEFAULT NULL,
`updated_at` datetime DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `index_subject_types_on_key` (`key`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=15 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
"""
create_table_role_types = """
CREATE TABLE `event_warehouse_test`.`role_types` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`key` varchar(255) COLLATE utf8_unicode_ci NOT NULL COMMENT 'The identifier for the role type',
`description` text COLLATE utf8_unicode_ci NOT NULL COMMENT 'A description of the role',
`created_at` datetime DEFAULT NULL,
`updated_at` datetime DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `index_role_types_on_key` (`key`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=17 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
"""
create_cherrypicked_samples_view = """
CREATE VIEW `unified_warehouse_test`.`cherrypicked_samples` AS
SELECT mlwh_sample.description AS "root_sample_id", mlwh_stock_resource.labware_human_barcode AS "plate_barcode",
mlwh_sample.phenotype AS "phenotype", mlwh_stock_resource.labware_coordinate AS "coordinate",
mlwh_sample.created AS "created", "Tecan" as "robot_type"
FROM unified_warehouse_test.sample AS mlwh_sample
JOIN unified_warehouse_test.stock_resource AS mlwh_stock_resource ON (mlwh_sample.id_sample_tmp = mlwh_stock_resource.id_sample_tmp)
JOIN event_warehouse_test.subjects mlwh_events_subjects ON (mlwh_events_subjects.friendly_name = mlwh_sample.sanger_sample_id)
JOIN event_warehouse_test.roles mlwh_events_roles ON (mlwh_events_roles.subject_id = mlwh_events_subjects.id)
JOIN event_warehouse_test.events mlwh_events_events ON (mlwh_events_roles.event_id = mlwh_events_events.id)
JOIN event_warehouse_test.event_types mlwh_events_event_types ON (mlwh_events_events.event_type_id = mlwh_events_event_types.id)
WHERE mlwh_events_event_types.key = "cherrypick_layout_set"
UNION
SELECT mlwh_sample.description AS "root_sample_id", mlwh_lh_sample.plate_barcode AS "plate_barcode",
mlwh_sample.phenotype AS "phenotype", mlwh_lh_sample.coordinate AS "coordinate", mlwh_sample.created AS "created",
"Beckman" as "robot_type"
FROM unified_warehouse_test.sample as mlwh_sample
JOIN unified_warehouse_test.lighthouse_sample AS mlwh_lh_sample ON (mlwh_sample.uuid_sample_lims = mlwh_lh_sample.lh_sample_uuid)
JOIN event_warehouse_test.subjects AS mlwh_events_subjects ON (mlwh_events_subjects.uuid = UNHEX(REPLACE(mlwh_lh_sample.lh_sample_uuid, '-', '')))
JOIN event_warehouse_test.roles AS mlwh_events_roles ON (mlwh_events_roles.subject_id = mlwh_events_subjects.id)
JOIN event_warehouse_test.events AS mlwh_events_events ON (mlwh_events_events.id = mlwh_events_roles.event_id)
JOIN event_warehouse_test.event_types AS mlwh_events_event_types ON (mlwh_events_event_types.id = mlwh_events_events.event_type_id)
WHERE mlwh_events_event_types.key = "lh_beckman_cp_destination_created"
"""
with sql_engine.connect() as connection:
connection.execute(text(create_db))
print("*** Dropping view CHERRYPICKED SAMPLES ***")
connection.execute(text(drop_view_cherrypicked_samples))
print("*** Dropping table ROLES ***")
connection.execute(text(drop_table_roles))
print("*** Dropping table ROLE TYPES ***")
connection.execute(text(drop_table_role_types))
print("*** Dropping table EVENTS ***")
connection.execute(text(drop_table_events))
print("*** Dropping table EVENT TYPES ***")
connection.execute(text(drop_table_event_types))
print("*** Dropping table SUBJECT ***")
connection.execute(text(drop_table_subjects))
print("*** Dropping table SUBJECT TYPES ***")
connection.execute(text(drop_table_subject_types))
print("*** Creating table SUBJECT TYPES ***")
connection.execute(text(create_table_subject_types))
print("*** Creating table SUBJECTS ***")
connection.execute(text(create_table_subjects))
print("*** Creating table EVENT TYPES ***")
connection.execute(text(create_table_event_types))
print("*** Creating table EVENTS ***")
connection.execute(text(create_table_events))
print("*** Creating table ROLE TYPES ***")
connection.execute(text(create_table_role_types))
print("*** Creating table ROLES ***")
connection.execute(text(create_table_roles))
print("*** Creating view CHERRYPICKED SAMPLES ***")
connection.execute(text(create_cherrypicked_samples_view))
print("Done")