Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hysplit concentration read speedup #177

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 38 additions & 26 deletions monetio/models/hysplit.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,12 +455,13 @@ def parse_hdata8(hdata8a, hdata8b, pdate1):
names = ["y" if x == "jndx" else x for x in names]
names = ["x" if x == "indx" else x for x in names]
names = ["z" if x == "levels" else x for x in names]
names = [col_name if x == "conc" else x for x in names]
concframe.columns = names
concframe.set_index(
["time", "z", "y", "x"],
inplace=True,
)
concframe.rename(columns={"conc": col_name}, inplace=True)
# concframe.set_index(
# ["time", "z", "y", "x"],
# inplace=True,
# )
#concframe.rename(columns={"conc": col_name}, inplace=True)
# mgrid = np.meshgrid(lat, lon)
return concframe

Expand Down Expand Up @@ -563,6 +564,7 @@ class and need to be converted to a python
# Safety valve - will not allow more than 1000 loops to be executed.
imax = 1e8
testf = True
timedslist = []
while testf:
hdata6 = np.fromfile(fid, dtype=rec6, count=1)
hdata7 = np.fromfile(fid, dtype=rec6, count=1)
Expand All @@ -574,10 +576,12 @@ class and need to be converted to a python
print("sample time", pdate1, " to ", pdate2)
# datelist = []
inc_iii = False
# LOOP to go through each level
for _ in range(self.atthash["Number of Levels"]):
# LOOP to go through each pollutant
for _ in range(self.atthash["Number of Species"]):
# LOOP to go through each pollutant
poldslist = []
for _ in range(self.atthash["Number of Species"]):
# LOOP to go through each level
concframes = []
for _ in range(self.atthash["Number of Levels"]):
# record 8a has the number of elements (ne). If number of
# elements greater than 0 than there are concentrations.
hdata8a = np.fromfile(fid, dtype=rec8a, count=1)
Expand Down Expand Up @@ -608,37 +612,45 @@ class and need to be converted to a python
concframe = self.parse_hdata8(hdata8a, hdata8b, pdate2)
else:
concframe = self.parse_hdata8(hdata8a, hdata8b, pdate1)
dset = xr.Dataset.from_dataframe(concframe)
concframes += [concframe]
# if verbose:
# print("Adding ", "Pollutant", pollutant, "Level", lev)
# if this is the first time through. create dataframe
# for first level and pollutant.
if not self.dset.any():
self.dset = dset
else: # create dataframe for level and pollutant and
# then merge with main dataframe.
# self.dset = xr.concat([self.dset, dset],'levels')
# self.dset = xr.merge([self.dset, dset],compat='override')
self.dset = xr.merge([self.dset, dset])
# self.dset = xr.combine_by_coords([self.dset, dset])
# self.dset = xr.merge([self.dset, dset], compat='override')
iimax += 1
# END LOOP to go through each pollutant
# END LOOP to go through each level
# END LOOP to go through each level
if len(concframes) > 0:
concframes = pd.concat(concframes)
concframes.set_index(
["time", "z", "y", "x"],
inplace=True,
)
dset = xr.Dataset.from_dataframe(concframes)
poldslist += [dset]
else:
poldslist += [None]
# END LOOP to go through each pollutant
# safety check - will stop sampling time while loop if goes over
# imax iterations.
if iimax > imax:
testf = False
print("greater than imax", testf, iimax, imax)
if inc_iii:
iii += 1

if len(poldslist) > 0:
timedslist += [poldslist]
# END OF Loop to go through each sampling time
self.atthash.update(self.gridhash)
self.atthash["Species ID"] = list(set(self.atthash["Species ID"]))
self.atthash["Coordinate time description"] = "Beginning of sampling time"
# END OF Loop to go through each sampling time
if not self.dset.any():

Ns = range(self.atthash["Number of Species"])
# Grab per species all relevant datasets in the time list
dsets = [[ll[n] for ll in timedslist if ll[n] is not None] for n in Ns]
dsets = [xr.concat(ds, dim='time') for ds in dsets if len(ds) > 0]
if len(dsets) == 0:
return False
self.dset = xr.merge(dsets)
# if not self.dset.any():
# return False
if self.dset.variables:
self.dset.attrs = self.atthash
# mgrid = self.makegrid(self.dset.coords["x"], self.dset.coords["y"])
Expand Down