forked from ua-snap/precip-dot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfudge.py
171 lines (137 loc) · 6.55 KB
/
fudge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Fudge final values to ensure that all data at a single point
# consistently increases as duration and return interval increase.
#
# At any instance of the data where the value for a duration is lower than
# the duration below it (e.g. The pf estimate for 4 days is less than the
# estimate for 3 days), we set the lower value to be equal to 1.01 times the
# higher value, thus ensuring that the values consistently increase.
#
# We alternate doing this iterating over durations and then intervals and
# repeat the process until no more values need to be adjusted.
#
# By the way, if you're reading this and thinking "hmm, that doesn't seem
# statistically sound", you're right. But it's what NOAA did with their
# data so we get to do it too!
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
import os
from glob import glob
import argparse
import xarray as xr
import numpy as np
path = None
out_path = None
data_group = None
DURATIONS = ['60m','2h', '3h', '6h', '12h','24h', '2d', '3d','4d','7d','10d','20d','30d','45d','60d',]
visited_durations = [False for _ in DURATIONS]
# Iterate over all of the durations and fudge values to ensure they consistently
# increase as duration increases. Returns the number of values that had to be
# changed
def iterate_durations():
print(" Iterating over durations...", flush=True)
values_changed = 0
# Iterate over pairs of durations (higher and lower)
for (i, higher_dur) in enumerate(DURATIONS[1:], start=1):
lower_dur = DURATIONS[i-1]
print(f" {lower_dur} -> {higher_dur}", flush=True)
# Naturally, we want to load files from the input directory and save
# them into the output directory, but since each file is likely
# to be loaded/saved multiple times as we change values, we need to be
# sure to only load from the input directory the first time and from the
# output directory all other times (so that changes to values persist
# as we do multiple iterations). So we track which durations we've
# "visited" before and choose the directory to load from accordingly.
if visited_durations[i]:
src_dir = out_path
else:
src_dir = path
visited_durations[i] = True
visited_durations[i-1] = True
higher_files = glob(os.path.join(src_dir,f"*_{data_group}_sum_wrf_{higher_dur}_*.nc"))
for higher_file in higher_files:
# Durations are stored across files, so we need to get the corresponding
# files for the higher and lower durations
lower_file = higher_file.replace(higher_dur,lower_dur)
# Load data
higher_ds = xr.load_dataset(higher_file)
lower_ds = xr.load_dataset(lower_file)
higher_arr = higher_ds['pf'].values
lower_arr = lower_ds['pf'].values
# mask represents the indices where the higher duration is less
# than the lower duration
diff = higher_arr - lower_arr
mask = diff <= 0
num = np.count_nonzero(mask)
# Adjust values
if num > 0:
print(f" {num} value(s) changed", flush=True)
values_changed += num
higher_arr[mask] = lower_arr[mask] * 1.01
higher_ds['pf'][...,...,...] = higher_arr
# Save results
higher_file_out = os.path.join(
out_path,os.path.basename(higher_file)\
.replace('_multiply.nc','_fudge.nc')
)
higher_ds.to_netcdf(higher_file_out)
# The lower duration doesn't get changed, but we need to ensure that
# the lowest duration (60m) is still in the output directory, so
# we simply save it back out on the first pass.
if i == 1:
lower_file_out = os.path.join(
out_path,os.path.basename(lower_file)\
.replace('_multiply.nc','_fudge.nc')
)
lower_ds.to_netcdf(lower_file_out)
higher_ds.close()
lower_ds.close()
return values_changed
# Iterate over all of the intervals and fudge values to ensure they consistently
# increase as interval increases. Returns the number of values that had to be
# changed
def iterate_intervals():
print(" Iterating over intervals...", flush=True)
values_changed = 0
# Since intervals are stored within each file, this process is
# more straightforward than iterating over durations since all the info
# we need to compare interval values are contained within a single file.
# We just have to work on each file in turn.
# Since the first iteration over intervals occurs after the iteration over
# durations, we can also just load from the output directory since the
# durations step would have moved everything into there.
files = glob(os.path.join(out_path,f"*_{data_group}_*.nc"))
for file in files:
print(f" {os.path.basename(file)}", flush=True)
ds = xr.load_dataset(file)
for i in range(1, len(ds.interval)):
higher_arr = ds['pf'][i ,...,...].values
lower_arr = ds['pf'][i-1,...,...].values
diff = higher_arr - lower_arr
mask = diff <= 0
num = np.count_nonzero(mask)
if num > 0:
print(f" {num} value(s) changed", flush=True)
values_changed += num
higher_arr[mask] = lower_arr[mask] * 1.01
ds['pf'][i,...,...] = higher_arr
ds.close()
ds.to_netcdf(file)
return values_changed
if __name__ == '__main__':
# parse some args
parser = argparse.ArgumentParser( description='Compute deltas for historical vs. projected data.' )
parser.add_argument( "-p", "--path", action='store', dest='path', type=str, help="input directory storing the return interval data." )
parser.add_argument( "-o", "--out_path", action='store', dest='out_path', type=str, help="output directory to write outputs" )
parser.add_argument( "-d", "--data_group", action='store', dest='data_group', type=str, help="name of the model to use: either 'NCAR-CCSM4' or 'GFDL-CM3'" )
# parse the args and unpack
args = parser.parse_args()
path = args.path
out_path = args.out_path
data_group = args.data_group
while True:
num = 0
num += iterate_durations()
num += iterate_intervals()
if num == 0:
break
print(" Done!", flush=True)