-
Notifications
You must be signed in to change notification settings - Fork 0
/
09_make_script_for_generate_training_data.py
128 lines (87 loc) · 4.1 KB
/
09_make_script_for_generate_training_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""
Wraps the script to generate training data given a total
number of light curves as a first command line argument
and the number of batches to split it up into as a second
command line argument (for parallelization).
Ekaterina Ilin
MIT License (2022)
"""
from datetime import datetime
import sys
from flares.__init__ import (LOG_DATA_OVERVIEW_PATH,
SCIPT_NAME_GENERATE_DATA,
)
DECOMPFUNCS = ["decompose_ed_randomly_and_using_Davenport",
"decompose_ed_from_UCDs_and_Davenport"]
if __name__ == "__main__":
today = datetime.now().strftime("%Y_%m_%d_%H_%M")
# quadratic limd darkening
u_ld = [0.5079, 0.2239]
# size of light curve
size_lc = 2000
# min and max energy of flares in ED space
emin, emax = 1e-1, 1e6
# min and max powerlaw exponent
alphamin, alphamax = 1.5, 1.5
# min and max number of flares per lc
betamin, betamax = 10,20
# numbers of spots
n_spots_min, n_spots_max = 1,1
# Gaussian noise level
errval = 5e-12
# pick a small but not too small flaring region size
spot_radius = 0.01
# pick a latitude width to scatter the spots around a bit
latwidth = 5
# total number of light curves given from command line
n_lcs = int(sys.argv[1])
# choose random mid latitude or fix it
midlat = "random"
# choose decomposition function
decomposeed = DECOMPFUNCS[1]
# inputs string for log file
inputs = (f"{u_ld[0]},{u_ld[1]},{emin},{emax},{alphamin},"
f"{alphamax},{betamin},{betamax},{size_lc},"
f"{errval},{spot_radius},{midlat},{latwidth},"
f"{n_spots_min},{n_spots_max},{decomposeed}")
# how many batches
batches = int(sys.argv[2])
# cleaning string that removes all row that are headers except for the first row inplace
clean_header = "sed '1!{/^istart,istop,tstart,tstop,ed_rec,ed_rec_err,ampl_rec,dur,total_n_valid_data_points,midlat_deg,inclination_deg,n_spots,beta_1,alpha_1,lon_deg_1,lat_deg_1,starid/d;}' -i"
# "sed '1!{/^istart,istop,tstart,tstop,ed_rec,ed_rec_err,ampl_rec,dur,total_n_valid_data_points,midlat_deg,inclination_deg,n_spots,beta_1,alpha_1,lon_deg_1,lat_deg_1,beta_2,alpha_2,lon_deg_2,lat_deg_2,beta_3,alpha_3,lon_deg_3,lat_deg_3,starid/d;}' -i"
# -------------------------- TRAINING SET ----------------------------------
# this is where the flare tables go
path = f"results/{today}_flares_train.csv"
# generate script for parallel run
# number of light curves per core
n_lcs_per_batch = n_lcs // batches
command = f"python 09_generate_training_data.py {today} {n_lcs_per_batch} {path} train\n"
with open(SCIPT_NAME_GENERATE_DATA, "w") as f:
for i in range(batches):
f.write(command)
# remove headers that got lost inside the dataframe
cleanup = f"{clean_header} {path}\n"
f.write(cleanup)
with open(LOG_DATA_OVERVIEW_PATH, "a") as f:
line = (f"{today},train,{path},{inputs},{n_lcs}\n")
f.write(line)
# -------------------------- TRAINING SET END ------------------------------
# -------------------------- VALIDATION SET --------------------------------
# validation set shall be 10% of the size of the training set
factor_smaller = 10
# this is where the flare tables go
path = f"results/{today}_flares_validate.csv"
# generate script for parallel run
# number of light curves per core
n_lcs_per_batch = n_lcs // batches // factor_smaller
command = f"python 09_generate_training_data.py {today} {n_lcs_per_batch} {path} validate\n"
with open(SCIPT_NAME_GENERATE_DATA, "a") as f:
for i in range(batches):
f.write(command)
# remove headers that got lost inside the dataframe
cleanup = f"{clean_header} {path}\n"
f.write(cleanup)
with open(LOG_DATA_OVERVIEW_PATH, "a") as f:
line = (f"{today},validate,{path},{inputs},{n_lcs // factor_smaller}\n")
f.write(line)
# -------------------------- VALIDATION SET END ----------------------------