noaa-oar-arl · TAdeJong · Jul 2, 2024 · Jul 2, 2024 · Jul 2, 2024 · Jul 17, 2024
diff --git a/monetio/models/hysplit.py b/monetio/models/hysplit.py
@@ -455,12 +455,13 @@ def parse_hdata8(hdata8a, hdata8b, pdate1):
         names = ["y" if x == "jndx" else x for x in names]
         names = ["x" if x == "indx" else x for x in names]
         names = ["z" if x == "levels" else x for x in names]
+        names = [col_name if x == "conc" else x for x in names]
         concframe.columns = names
-        concframe.set_index(
-            ["time", "z", "y", "x"],
-            inplace=True,
-        )
-        concframe.rename(columns={"conc": col_name}, inplace=True)
+        # concframe.set_index(
+        #     ["time", "z", "y", "x"],
+        #     inplace=True,
+        # )
+        #concframe.rename(columns={"conc": col_name}, inplace=True)
         # mgrid = np.meshgrid(lat, lon)
         return concframe
 
@@ -563,6 +564,7 @@ class and need to be converted to a python
         # Safety valve - will not allow more than 1000 loops to be executed.
         imax = 1e8
         testf = True
+        timedslist = []
         while testf:
             hdata6 = np.fromfile(fid, dtype=rec6, count=1)
             hdata7 = np.fromfile(fid, dtype=rec6, count=1)
@@ -574,10 +576,12 @@ class and need to be converted to a python
                 print("sample time", pdate1, " to ", pdate2)
             # datelist = []
             inc_iii = False
-            # LOOP to go through each level
-            for _ in range(self.atthash["Number of Levels"]):
-                # LOOP to go through each pollutant
-                for _ in range(self.atthash["Number of Species"]):
+            # LOOP to go through each pollutant
+            poldslist = []
+            for _ in range(self.atthash["Number of Species"]):
+                # LOOP to go through each level
+                concframes = []
+                for _ in range(self.atthash["Number of Levels"]):
                     # record 8a has the number of elements (ne). If number of
                     # elements greater than 0 than there are concentrations.
                     hdata8a = np.fromfile(fid, dtype=rec8a, count=1)
@@ -608,37 +612,45 @@ class and need to be converted to a python
                             concframe = self.parse_hdata8(hdata8a, hdata8b, pdate2)
                         else:
                             concframe = self.parse_hdata8(hdata8a, hdata8b, pdate1)
-                        dset = xr.Dataset.from_dataframe(concframe)
+                        concframes += [concframe]
                         # if verbose:
                         #    print("Adding ", "Pollutant", pollutant, "Level", lev)
-                        # if this is the first time through. create dataframe
-                        # for first level and pollutant.
-                        if not self.dset.any():
-                            self.dset = dset
-                        else:  # create dataframe for level and pollutant and
-                            # then merge with main dataframe.
-                            # self.dset = xr.concat([self.dset, dset],'levels')
-                            # self.dset = xr.merge([self.dset, dset],compat='override')
-                            self.dset = xr.merge([self.dset, dset])
-                            # self.dset = xr.combine_by_coords([self.dset, dset])
-                            # self.dset = xr.merge([self.dset, dset], compat='override')
                         iimax += 1
-                # END LOOP to go through each pollutant
-            # END LOOP to go through each level
+                # END LOOP to go through each level
+                if len(concframes) > 0:
+                    concframes = pd.concat(concframes)
+                    concframes.set_index(
+                                    ["time", "z", "y", "x"],
+                                    inplace=True,
+                    )
+                    dset = xr.Dataset.from_dataframe(concframes)
+                    poldslist += [dset]
+                else:
+                    poldslist += [None]
+            # END LOOP to go through each pollutant
             # safety check - will stop sampling time while loop if goes over
             #  imax iterations.
             if iimax > imax:
                 testf = False
                 print("greater than imax", testf, iimax, imax)
             if inc_iii:
                 iii += 1
-
+            if len(poldslist) > 0:
+                timedslist += [poldslist]
+        # END OF Loop to go through each sampling time
         self.atthash.update(self.gridhash)
         self.atthash["Species ID"] = list(set(self.atthash["Species ID"]))
         self.atthash["Coordinate time description"] = "Beginning of sampling time"
-        # END OF Loop to go through each sampling time
-        if not self.dset.any():
+
+        Ns = range(self.atthash["Number of Species"])
+        # Grab per species all relevant datasets in the time list
+        dsets = [[ll[n] for ll in timedslist if ll[n] is not None] for n in Ns]
+        dsets = [xr.concat(ds, dim='time') for ds in dsets if len(ds) > 0]
+        if len(dsets) == 0:
             return False
+        self.dset = xr.merge(dsets)
+        # if not self.dset.any():
+        #     return False
         if self.dset.variables:
             self.dset.attrs = self.atthash
             # mgrid = self.makegrid(self.dset.coords["x"], self.dset.coords["y"])