-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDRN_pp.py
99 lines (71 loc) · 4.18 KB
/
DRN_pp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import pandas as pd
import numpy as np
import tensorflow as tf
from data import normalize, get_fcst_data_leadtime, get_orog_data, preprocess_data_train, preprocess_data_test
from model.drn_model import drn, drn_pp
from model.crps_function import crps_cost_function, crps_normal
f = open('./model_training.txt','w') # or None
feather_path = './data-feather/'
def main():
for n_leadtime in range(21):
train_data, test_data = get_fcst_data_leadtime(path=feather_path, leadtime=n_leadtime)
model_orog = get_orog_data(path='./implementation-data/model_orog.csv')
train_features_raw, train_targets, train_IDs, train_lu, train_info = preprocess_data_train(train_data, model_orog)
test_features_raw, test_targets, test_IDs, test_lu, test_info, leadtime = preprocess_data_test(test_data, model_orog)
# normalize data
train_features = train_features_raw.copy()
test_features = test_features_raw.copy()
train_features[:,:9], train_shift, train_scale = normalize(train_features_raw[:,:9], method="MAX")
test_features[:,:9] = normalize(test_features_raw[:,:9], shift=train_shift, scale=train_scale)[0]
n_features = train_features.shape[1]
emb_size_id = 2
max_id = int(np.max([train_IDs.max(), test_IDs.max()]))
emb_size_lu = 4
max_lu = int(np.max([train_lu.max(), test_lu.max()]))
nreps = 10
trn_scores = []
test_scores = []
preds = []
repred = []
trn_times = []
test_times = []
model = drn(n_features, max_id, emb_size_id, max_lu, emb_size_lu)
data = [train_features, train_IDs, train_lu, train_targets, test_features, test_IDs, test_lu, test_targets]
loss_fn = crps_cost_function
early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'loss', min_delta = 0.005, patience = 2, restore_best_weights = True)
# training multiple models in a loop
for i in range(nreps):
print(f'Repetition {i}')
training_time, predicting_time, reprediction, prediction, training_score, test_score = drn_pp(model, data, loss_fn, early_stopping)
trn_scores.append(training_score)
test_scores.append(test_score)
preds.append(prediction)
repred.append(reprediction)
trn_times.append(training_time)
test_times.append(predicting_time)
preds = np.array(preds)
preds[:, :, 1] = np.abs(preds[:, :, 1]) # Make sure std is positive
mean_preds = np.mean(preds, 0)
repred = np.array(repred)
repred[:, :, 1] = np.abs(repred[:, :, 1]) # Make sure std is positive
mean_repred = np.mean(repred, 0)
mean_preds_df = pd.DataFrame(mean_preds)
mean_repred_df = pd.DataFrame(mean_repred)
train_combine = pd.concat([train_info, mean_repred_df], axis=1)
test_combine = pd.concat([test_info, mean_preds_df], axis=1)
train_combine = train_combine.rename(columns={0: 't2m_mean', 1: 't2m_std'})
test_combine = test_combine.rename(columns={0: 't2m_mean', 1: 't2m_std'})
fcst_data_leadtime_path = feather_path
train_combine.reset_index().to_feather(fcst_data_leadtime_path + 'orog_repred_leadtime' + str(n_leadtime) + '.feather')
test_combine.reset_index().to_feather(fcst_data_leadtime_path + 'orog_pred_leadtime' + str(n_leadtime) + '.feather')
# evaluate ensemble of models
ens_score = crps_normal(mean_preds[:, 0], mean_preds[:, 1], test_targets).mean()
print(f'Lead time = {leadtime}; \nEnsemble test score = {ens_score}', file=f)
print('\nInformation on the 10 repetitions of the DRN model:', file=f)
print(f'\nTraining losses: {trn_scores}; \nTest losses: {test_scores}', file=f)
print(f'\nProcessing time of training (in seconds): {trn_times}; \nProcessing time of predicting (in seconds): {test_times}', file=f)
if __name__ == '__main__':
# execute main function only if script is called as the __main__ script
main()
if f is not None:
f.close()