-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdatabase_operations.py
503 lines (432 loc) · 16.4 KB
/
database_operations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
import csv
import pandas as pd
import json
from utils import database_operation
# DATABASE-RELATED FUNCTIONS
@database_operation
def create_database(cursor):
# Create tables
cursor.execute("DROP TABLE IF EXISTS mercats;")
cursor.execute("""
CREATE TABLE IF NOT EXISTS mercats(
id SMALLINT NOT NULL AUTO_INCREMENT UNIQUE,
nom VARCHAR(100),
posicio POINT,
municipi VARCHAR(50),
PRIMARY KEY(id),
FOREIGN KEY(municipi) REFERENCES municipis(nom)
);
""")
cursor.execute("DROP TABLE IF EXISTS populartimes;")
cursor.execute("""
CREATE TABLE IF NOT EXISTS populartimes(
mercat_id SMALLINT NOT NULL,
day_of_week CHAR(9),
hour TINYINT,
value TINYINT,
timestamp TIMESTAMP
);
""")
cursor.execute("DROP TABLE IF EXISTS plots;")
cursor.execute("""
CREATE TABLE IF NOT EXISTS plots(
id SMALLINT NOT NULL AUTO_INCREMENT UNIQUE,
plot_type VARCHAR(10),
added_parameters VARCHAR(50),
html TEXT,
timestamp TIMESTAMP,
PRIMARY KEY(id)
);
""")
cursor.execute("DROP TABLE IF EXISTS municipis;")
cursor.execute("""
CREATE TABLE IF NOT EXISTS municipis(
nom VARCHAR(50),
area TINYINT,
cens INT,
dia_mercat VARCHAR(70),
renda_familiar FLOAT,
lloguer_mensual FLOAT,
index_envelliment FLOAT,
PRIMARY KEY(nom)
);
""")
# 'Insert' functions: they read the data and pull it to the database
@database_operation
def insert_mercats(cursor):
mercats = []
filename = "data/mercats.csv"
cursor.execute("DELETE FROM mercats;")
with open(file=filename, mode='r', encoding='UTF-8') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
mercat_name = row[0]
mercat_lat = float(row[1])
mercat_lng = float(row[2])
mercat_point = f'POINT({mercat_lat} {mercat_lng})'
mercat_municipi = row[3]
mercats.append((mercat_name, mercat_point, mercat_municipi))
query = "INSERT INTO mercats (nom, posicio, municipi) VALUES (%s, ST_GeomFromText(%s), %s);"
cursor.executemany(query, mercats)
@database_operation
def insert_popular_times(cursor, popular_times):
query = "INSERT INTO populartimes (mercat_id, day_of_week, hour, value) VALUES (%s, %s, %s, %s);"
cursor.executemany(query, popular_times)
@database_operation
def insert_municipis(cursor):
filename = "data/municipis.csv"
cursor.execute("DELETE FROM municipis;")
with open(file=filename, mode='r', encoding='UTF-8') as csvfile:
reader = csv.reader(csvfile)
next(reader) # Skip header
values = []
for row in reader:
nom = row[0]
cens = int(row[1])
dia_mercat = row[2]
renda_bruta_familiar_disponible = int(row[3])
lloguer_mensual = float(row[4]) if row[4] != '' else 0 # Notice this means SQL will have to avoid 0's
index_envelliment = float(row[5]) if row[5] != '' else 0 # Notice this means SQL will have to avoid 0's
values.append(
(nom, 1, cens, dia_mercat, renda_bruta_familiar_disponible, lloguer_mensual, index_envelliment))
query = "INSERT INTO municipis(nom, area, cens, dia_mercat, renda_familiar, lloguer_mensual, index_envelliment) VALUES(%s, %s, %s, %s, %s, %s, %s);"
cursor.executemany(query, values)
# 'Read' functions: they display the result of a select upon each table
@database_operation
def read_mercats(cursor):
cursor.execute("SELECT * FROM mercats;")
print(cursor.fetchall())
@database_operation
def read_populartimes(cursor):
cursor.execute("SELECT * FROM populartimes;")
print(cursor.fetchall())
@database_operation
def read_municipis(cursor):
cursor.execute("SELECT * FROM municipis;")
print(cursor.fetchall())
# 'Load' functions: they take the data from database and load into memory to be able to work with them
@database_operation
def load_mercats(cursor):
cursor.execute(
"SELECT id, nom, ST_X(posicio), ST_Y(posicio) FROM mercats;") # ******************************* CHANGE THIS ********
return cursor.fetchall()
@database_operation
def load_populartimes(cursor):
cursor.execute("SELECT * FROM populartimes;")
return cursor.fetchall()
# The following 'load' functions receive an already open connection (notice lack of @database_operation)
# due to them all being made in one single connection upon the plotting process
def load_geojson(cnx):
df = pd.read_sql("""
SELECT nom, ST_AsGeoJSON(posicio) posicio, municipi, page_id
FROM mercats mer INNER JOIN mercatspages mp ON mer.id = mp.mercat_id;
""",
con=cnx)
feature_collection = {'type': "FeatureCollection", 'features': []}
for i in range(0, len(df.index)):
point = json.loads(df['posicio'][i])
feature = {
'type': "Feature",
'geometry': point,
'properties': {
'nom': df['nom'][i],
'municipi': df['municipi'][i],
'page_id': int(df['page_id'][i])
}
}
feature_collection['features'].append(feature)
return feature_collection
def load_avg_popular_times_per_hour_all_time(cnx):
df = pd.read_sql("""
SELECT AVG(value) AS valor_mitja
FROM populartimes
GROUP BY hour;
""",
con=cnx)
return df
def load_avg_popular_times_per_hour_last_week(cnx):
df = pd.read_sql("""
SELECT AVG(value) AS valor_mitja
FROM populartimes
WHERE TIMESTAMPDIFF(WEEK, timestamp, CURRENT_TIMESTAMP()) <= 1
GROUP BY hour;
""",
con=cnx)
return df
def load_avg_all_days_vs_market_days(cnx):
df1 = load_avg_popular_times_per_hour_all_time(cnx)
df2 = pd.read_sql("""
SELECT AVG(pt.value) AS valor_mitja_dies_mercat
FROM populartimes pt
INNER JOIN mercats mer
ON pt.mercat_id = mer.id
INNER JOIN municipis mun
ON mer.municipi = mun.nom
WHERE mun.dia_mercat LIKE CONCAT('%', pt.day_of_week, '%')
GROUP BY pt.hour;
""",
con=cnx)
return df1.join(df2)
def load_avg_all_days_vs_market_days_no_bcn(cnx):
df1 = load_avg_popular_times_per_hour_all_time(cnx)
df2 = pd.read_sql("""
SELECT AVG(pt.value) AS valor_mitja_dies_mercat
FROM populartimes pt
INNER JOIN mercats mer
ON pt.mercat_id = mer.id
INNER JOIN municipis mun
ON mer.municipi = mun.nom
WHERE mun.dia_mercat LIKE CONCAT('%', pt.day_of_week, '%')
AND mun.nom != 'Barcelona'
GROUP BY pt.hour;
""",
con=cnx)
return df1.join(df2)
# esto es una cagada celestial
# no exporta las horas
# no ordena los datos
# puede recibir varios (no es el máximo sino los de la última semana, y no los promedia)
def load_indiv_market_this_week(id_mercat, cnx):
df = pd.read_sql(f"""
SELECT value, day_of_week
FROM populartimes pt
INNER JOIN mercats mer
ON pt.mercat_id = mer.id
WHERE mer.nom = \"{id_mercat}\"
AND TIMESTAMPDIFF(WEEK, pt.timestamp, CURRENT_TIMESTAMP()) <= 1
ORDER BY
day_of_week
LIMIT 126;
""",
con=cnx)
return df
def load_indiv_market_this_week2(id_mercat, cnx):
df = pd.read_sql(f"""
SELECT
`mercat_id`, `day_of_week`, `hour`, `value`, max(timestamp)
FROM
populartimes
WHERE
`mercat_id` = \"{id_mercat}\"
GROUP BY
mercat_id, `day_of_week`, `hour`
""",
con=cnx)
return df
def load_indiv_market_last_week2(id_mercat, cnx):
df = pd.read_sql(f"""
SELECT
`mercat_id`, `day_of_week`, `hour`, `value`, max(timestamp)
FROM
populartimes
WHERE
`mercat_id` = \"{id_mercat}\" AND
TIMESTAMPDIFF(WEEK, `timestamp`, CURRENT_TIMESTAMP()) BETWEEN 1 AND 2
GROUP BY
mercat_id, `day_of_week`, `hour`
""",
con=cnx)
return df
def load_indiv_market_last_week(id_mercat, cnx):
df = pd.read_sql(f"""
SELECT value, day_of_week
FROM populartimes pt
INNER JOIN mercats mer
ON pt.mercat_id = mer.id
WHERE mer.nom = \"{id_mercat}\"
AND TIMESTAMPDIFF(WEEK, pt.timestamp, CURRENT_TIMESTAMP()) BETWEEN 1 AND 2
LIMIT 168;
""",
con=cnx)
return df
def load_indiv_market_all_time(id_mercat, cnx):
df = pd.read_sql(f"""
SELECT value, day_of_week
FROM populartimes pt
INNER JOIN mercats mer
ON pt.mercat_id = mer.id
WHERE mer.id = \"{id_mercat}\"
GROUP BY day_of_week, hour;
""",
con=cnx)
return df
def load_indiv_market_all_time2(id_mercat, cnx):
df = pd.read_sql(f"""
SELECT
`mercat_id`, `day_of_week`, `hour`, AVG(`value`)
FROM
populartimes
WHERE
`mercat_id` = \"{id_mercat}\"
GROUP BY
mercat_id, `day_of_week`, `hour`
""",
con=cnx)
return df
def load_avg_population_groups(cnx):
df = pd.read_sql("""
SELECT AVG(pt.value) value, pt.hour hour,
CASE
WHEN mun.cens < 1000 THEN "< 1000"
WHEN mun.cens BETWEEN 1001 AND 5000 THEN "1001-5000"
WHEN mun.cens BETWEEN 5001 AND 10000 THEN "5001-10000"
WHEN mun.cens BETWEEN 10001 AND 50000 THEN "10001-50000"
WHEN mun.cens BETWEEN 50001 AND 1000000 THEN "50001-1000000"
WHEN mun.cens > 1000000 THEN "> 1000000"
END AS tram_poblacio
FROM populartimes pt
INNER JOIN mercats mer
ON pt.mercat_id = mer.id
INNER JOIN municipis mun
ON mer.municipi = mun.nom
GROUP BY tram_poblacio, hour;
""",
con=cnx)
return df
def load_avg_renda_familiar(cnx):
df = pd.read_sql("""
SELECT AVG(pt.value) value, pt.hour hour,
CASE
WHEN mun.renda_familiar BETWEEN 11000 AND 14000 THEN "11000€-14000€"
WHEN mun.renda_familiar BETWEEN 14001 AND 17000 THEN "14001€-17000€"
WHEN mun.renda_familiar BETWEEN 17001 AND 20000 THEN "17001€-20000€"
WHEN mun.renda_familiar BETWEEN 20001 AND 23000 THEN "20001€-23000€"
WHEN mun.renda_familiar > 23000 THEN "> 23000€"
END AS tram_renda_familiar
FROM populartimes pt
INNER JOIN mercats mer
ON pt.mercat_id = mer.id
INNER JOIN municipis mun
ON mer.municipi = mun.nom
GROUP BY tram_renda_familiar, hour;
""",
con=cnx)
return df
def load_avg_renda_familiar_no_bcn(cnx):
df = pd.read_sql("""
SELECT AVG(pt.value) value, pt.hour hour,
CASE
WHEN mun.renda_familiar BETWEEN 11000 AND 14000 THEN "11000€-14000€"
WHEN mun.renda_familiar BETWEEN 14001 AND 17000 THEN "14001€-17000€"
WHEN mun.renda_familiar BETWEEN 17001 AND 20000 THEN "17001€-20000€"
WHEN mun.renda_familiar BETWEEN 20001 AND 23000 THEN "20001€-23000€"
WHEN mun.renda_familiar > 23000 THEN "> 23000€"
END AS tram_renda_familiar
FROM populartimes pt
INNER JOIN mercats mer
ON pt.mercat_id = mer.id
INNER JOIN municipis mun
ON mer.municipi = mun.nom
WHERE mun.nom != 'Barcelona'
GROUP BY tram_renda_familiar, hour;
""",
con=cnx)
return df
def load_municipi_i_renda_familiar(cnx):
df = pd.read_sql("""
SELECT nom,
CASE
WHEN renda_habitant BETWEEN 11000 AND 14000 THEN "11000€-14000€"
WHEN renda_habitant BETWEEN 14001 AND 17000 THEN "14001€-17000€"
WHEN renda_habitant BETWEEN 17001 AND 20000 THEN "17001€-20000€"
WHEN renda_habitant BETWEEN 20001 AND 23000 THEN "20001€-23000€"
WHEN renda_habitant > 23000 THEN "> 23000€"
END AS tram_renda_familiar
FROM municipis
ORDER BY nom;
""",
con=cnx)
return df
def load_avg_preu_lloguer(cnx):
df = pd.read_sql("""
SELECT AVG(pt.value) value, pt.hour hour,
CASE
WHEN lloguer_mensual BETWEEN 1 AND 300 THEN "< 300€"
WHEN lloguer_mensual BETWEEN 301 AND 400 THEN "301€-400€"
WHEN lloguer_mensual BETWEEN 401 AND 500 THEN "401€-500€"
WHEN lloguer_mensual BETWEEN 501 AND 700 THEN "501€-700€"
WHEN lloguer_mensual BETWEEN 701 AND 900 THEN "701€-900€"
WHEN lloguer_mensual > 901 THEN "> 901€"
END AS tram_lloguer_mensual
FROM populartimes pt
INNER JOIN mercats mer
ON pt.mercat_id = mer.id
INNER JOIN municipis mun
ON mer.municipi = mun.nom
GROUP BY tram_lloguer_mensual, hour;
""",
con=cnx)
return df.loc[18:]
def load_avg_preu_lloguer_no_bcn(cnx):
df = pd.read_sql("""
SELECT AVG(pt.value) value, pt.hour hour,
CASE
WHEN lloguer_mensual BETWEEN 1 AND 300 THEN "< 300€"
WHEN lloguer_mensual BETWEEN 301 AND 400 THEN "301€-400€"
WHEN lloguer_mensual BETWEEN 401 AND 500 THEN "401€-500€"
WHEN lloguer_mensual BETWEEN 501 AND 700 THEN "501€-700€"
WHEN lloguer_mensual BETWEEN 701 AND 900 THEN "701€-900€"
WHEN lloguer_mensual > 901 THEN "> 901€"
END AS tram_lloguer_mensual
FROM populartimes pt
INNER JOIN mercats mer
ON pt.mercat_id = mer.id
INNER JOIN municipis mun
ON mer.municipi = mun.nom
WHERE mun.nom != 'Barcelona'
GROUP BY tram_lloguer_mensual, hour;
""",
con=cnx)
return df.loc[18:]
def load_avg_index_envelliment(cnx):
df = pd.read_sql("""
SELECT AVG(pt.value) value, pt.hour hour,
CASE
WHEN index_envelliment BETWEEN 0 AND 75 THEN "< 75%"
WHEN index_envelliment BETWEEN 75 AND 100 THEN "75-100%"
WHEN index_envelliment BETWEEN 100 AND 150 THEN "100-150%"
WHEN index_envelliment > 150 THEN "> 150%"
END AS tram_index_envelliment
FROM populartimes pt
INNER JOIN mercats mer
ON pt.mercat_id = mer.id
INNER JOIN municipis mun
ON mer.municipi = mun.nom
GROUP BY tram_index_envelliment, hour;
""",
con=cnx)
return df
def load_avg_index_envelliment_no_bcn(cnx):
df = pd.read_sql("""
SELECT AVG(pt.value) value, pt.hour hour,
CASE
WHEN index_envelliment BETWEEN 0 AND 75 THEN "< 75%"
WHEN index_envelliment BETWEEN 75 AND 100 THEN "75-100%"
WHEN index_envelliment BETWEEN 100 AND 150 THEN "100-150%"
WHEN index_envelliment > 150 THEN "> 150%"
END AS tram_index_envelliment
FROM populartimes pt
INNER JOIN mercats mer
ON pt.mercat_id = mer.id
INNER JOIN municipis mun
ON mer.municipi = mun.nom
WHERE mun.nom != 'Barcelona'
GROUP BY tram_index_envelliment, hour;
""",
con=cnx)
return df
def load_indiv_market_data(cnx):
df = pd.read_sql("""
SELECT id, nom
FROM mercats;
""",
con=cnx)
return df
# 'write_plot' function: takes all the generated plots and writes them to DB
@database_operation
def write_plots(cursor, plots):
cursor.execute(
"INSERT INTO plots_backup(plot_type, added_parameters, html, timestamp) SELECT plot_type, added_parameters, html, timestamp FROM plots;")
cursor.execute("DELETE FROM plots;")
query = "INSERT INTO plots(plot_type, added_parameters, html) VALUES (%s, %s, %s);"
cursor.executemany(query, plots)