-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
151 lines (109 loc) · 5.73 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import json
import logging
import os
import pickle
import sys
import webbrowser
import dateparser
import numpy
from pandas import read_csv, DataFrame
import timexseries.data_ingestion
from timexseries.data_prediction.xcorr import calc_xcorr
from timexseries.data_ingestion import add_freq, select_timeseries_portion, add_diff_columns
from timexseries.data_prediction.models.prophet_predictor import FBProphetModel
from timexseries.data_prediction import create_timeseries_containers
from timexseries.timeseries_container import TimeSeriesContainer
log = logging.getLogger(__name__)
def compute():
param_file_nameJSON = 'configurations/configuration_test_covid19italy.json'
# Load parameters from config file.
with open(param_file_nameJSON) as json_file: # opening the config_file_name
param_config = json.load(json_file) # loading the json
# Logging
log_level = getattr(logging, param_config["verbose"], None)
if not isinstance(log_level, int):
log_level = 0
# %(name)s for module name
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=log_level, stream=sys.stdout)
# data ingestion
log.info(f"Started data ingestion.")
ingested_data = timexseries.data_ingestion.ingest_timeseries(param_config) # ingestion of data
# data selection
log.info(f"Started data selection.")
ingested_data = select_timeseries_portion(ingested_data, param_config)
# Custom columns
log.info(f"Adding custom columns.")
ingested_data["New cases/tests ratio"] = [100 * (np / tamp) for np, tamp in
zip(ingested_data['Daily cases'], ingested_data['Daily tests'])]
# data prediction
containers = create_timeseries_containers(ingested_data=ingested_data, param_config=param_config)
####################################################################################################################
# Custom time-series #########
# If you are studying TIMEX code: you can ignore this.
log.info(f"Computing the custom time-series.")
regions = read_csv("https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv",
header=0, index_col=0, usecols=['data', 'denominazione_regione', 'nuovi_positivi', 'tamponi'])
regions.reset_index(inplace=True)
regions['data'] = regions['data'].apply(lambda x: dateparser.parse(x))
regions.set_index(['data', 'denominazione_regione'], inplace=True, drop=True)
regions = add_diff_columns(regions, ['tamponi'], group_by='denominazione_regione')
regions.rename(columns={'nuovi_positivi': 'Daily cases', 'tamponi': 'Tests',
"tamponi_diff": "Daily tests"}, inplace=True)
regions["New cases/tests ratio"] = [100*(ndc/tamp) if tamp > ndc > 0 else "nan" for ndc, tamp in
zip(regions['Daily cases'], regions['Daily tests'])]
# Prediction of "New daily cases" for every region
# We also want to plot cross-correlation with other regions.
# So, create a dataFrame with only daily cases and regions as columns.
regions_names = regions.index.get_level_values(1).unique()
regions_names = regions_names.sort_values()
datas = regions.index.get_level_values(0).unique().to_list()
datas = datas[1:] # Abruzzo is missing the first day.
cols = regions_names.to_list()
cols = ['data'] + cols
daily_cases_regions = DataFrame(columns=cols, dtype=numpy.float64)
daily_cases_regions['data'] = datas
daily_cases_regions.set_index(['data'], inplace=True, drop=True)
for col in daily_cases_regions.columns:
for i in daily_cases_regions.index:
daily_cases_regions.loc[i][col] = regions.loc[i, col]['Daily cases']
daily_cases_regions = add_freq(daily_cases_regions, 'D')
max_lags = param_config['xcorr_parameters']['xcorr_max_lags']
modes = [*param_config['xcorr_parameters']["xcorr_mode"].split(",")]
try:
max_threads = param_config['max_threads']
except KeyError:
try:
max_threads = len(os.sched_getaffinity(0))
except:
max_threads = 1
for region in daily_cases_regions.columns:
timeseries_data = daily_cases_regions[[region]]
model_results = {}
xcorr = calc_xcorr(region, daily_cases_regions, max_lags, modes)
log.info(f"Computing univariate prediction for {region}...")
predictor = FBProphetModel(param_config, transformation="none")
prophet_result = predictor.launch_model(timeseries_data.copy(), max_threads=max_threads)
model_results['fbprophet'] = prophet_result
#
# predictor = ARIMA(param_config)
# arima_result = predictor.launch_model(scenario_data.copy())
# model_results.append(arima_result)
s = TimeSeriesContainer(timeseries_data, model_results, xcorr)
containers.append(s)
# children_for_each_scenario.append({
# 'name': region,
# 'children': create_scenario_children(s, param_config)
# })
####################################################################################################################
# Save the computed data; these are the TimeSeriesContainer objects from which a nice Dash page can be built.
# They can be loaded by "app_load_from_dump.py" to start the app
# without re-computing all the data.
with open(f"containers.pkl", 'wb') as input_file:
pickle.dump(containers, input_file)
if __name__ == '__main__':
compute()
def open_browser():
webbrowser.open("http://127.0.0.1:8000")
# Timer(6, open_browser).start()
os.chdir(os.path.dirname(os.path.abspath(__file__)))
os.system("gunicorn -b 0.0.0.0:8003 app_load_from_dump:server")