Merge pull request #23 from NCAR/develop

Develop
NCAR · Jan 26, 2023 · 258a24e · 258a24e
2 parents 9be1cdb + 6401036
commit 258a24e
Show file tree

Hide file tree

Showing 7 changed files with 158 additions and 38 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -33,13 +33,18 @@ jobs:
             python=${{ matrix.python-version }}
 
       - name: Test with pytest
-        run: pytest -n auto -v
+        run: pytest -n auto -v -k 'not aqs'
 
       - name: Test with pytspack installed
         run: |
           pip install https://github.com/noaa-oar-arl/pytspack/archive/master.zip
           pytest -n auto -v -k with_pytspack
 
+      - name: Downgrade OpenSSL and test AQS
+        run: |
+          micromamba install 'openssl <3'
+          pytest -n auto -v -k aqs
+
   docs:
     name: Check docs build
     runs-on: ubuntu-latest
@@ -57,6 +62,9 @@ jobs:
           environment-file: docs/environment-docs.yml
           cache-env: true
 
+      - name: Downgrade OpenSSL (for AQS URL linkcheck)
+        run: micromamba install 'openssl <3'
+
       - name: linkcheck
         run: sphinx-build -b linkcheck docs docs/_build/linkcheck
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -56,6 +56,11 @@
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = "sphinx"
 
+linkcheck_ignore = [
+    "https://doi.org/10.1080/10473289.2005.10464718",
+    "https://www.camx.com",
+]
+
 # -- Extension configuration -------------------------------------------------
 
 extlinks = {

diff --git a/docs/tutorial/improve_trends_kmeans.rst b/docs/tutorial/improve_trends_kmeans.rst
@@ -67,7 +67,7 @@ Now we will load the data (in this case the file is
       self._setitem_with_indexer(indexer, value)
 
 
-Lets look at the dataframe
+Let's look at the dataframe.
 
 .. code-block:: python
 
@@ -438,8 +438,8 @@ Lets look at the dataframe
 
 
 
-Now this is in the long pandas format. Lets use the
-monet.util.tools.long_to_wide utility to reformat the dataframe into a
+Now this is in the long pandas format. Let's use the
+``monet.util.tools.long_to_wide`` utility to reformat the dataframe into a
 wide format.
 
 .. code-block:: python
@@ -622,7 +622,7 @@ wide format.
 
 
 
-Lets now plot some of the different measurements with time from a site.
+Let's now plot some of the different measurements with time from a site.
 In this case we will look at the PHOE1 site in Phoenix, Arizona.
 
 .. code-block:: python
@@ -828,7 +828,7 @@ Let’s look at SIf as an example from ACAD1.
 .. image:: improve_trends_kmeans_files/improve_trends_kmeans_11_1.png
 
 
-Now this is good but lets resample to see if we can see a trend.
+Now this is good, but let's resample to see if we can see a trend.
 
 .. code-block:: python
 
@@ -840,10 +840,10 @@ Now this is good but lets resample to see if we can see a trend.
 .. image:: improve_trends_kmeans_files/improve_trends_kmeans_13_0.png
 
 
-Simply resampling is fine but lets try to get a signal out using a
-kolmogorov-zerbenko filter. See
-https://www.tandfonline.com/doi/pdf/10.1080/10473289.2005.10464718 for
-more information
+Simply resampling is fine, but let's try to get a signal out using a
+Kolmogorov--Zurbenko filter. See
+https://doi.org/10.1080/10473289.2005.10464718 for
+more information.
 
 .. code-block:: python
 
@@ -878,7 +878,7 @@ some tools from sklearn to use in our analysis.
 
 .. code-block:: python
 
-    from sklearn.preprocessing import RobustScaler #to scale our data
+    from sklearn.preprocessing import RobustScaler # to scale our data
     from sklearn.cluster import KMeans # clustering algorithm
 
 First we want to separate out different variables that may be useful
@@ -968,7 +968,7 @@ NaN values so let us go ahead and do that.
 
 
 Usually, with sklearn it is better to scale the data first before
-putting it through the algorithm. We will use th RobustScaler to do
+putting it through the algorithm. We will use the RobustScaler to do
 this.
 
 .. code-block:: python
@@ -983,14 +983,14 @@ analysis.
 
     km = KMeans(n_clusters=2).fit(X_scaled)
 
-The clusters can be found under km.labels\_ . These are integers
+The clusters can be found under ``km.labels_``. These are integers
 representing the different clusters.
 
 .. code-block:: python
 
     clusters = km.labels_
 
-Lets plot this so that we can see where there is dust.
+Let's plot this so that we can see where there is dust.
 
 .. code-block:: python
 

diff --git a/monetio/models/_wrfchem_mm.py b/monetio/models/_wrfchem_mm.py
@@ -189,7 +189,7 @@ def open_mfdataset(
     if "pm25_om" in list_calc_sum:
         dset = add_lazy_om_pm25(dset, dict_sum)
 
-    dset = dset.reset_index(["XTIME", "datetime"], drop=True)
+    dset = dset.reset_coords(["XTIME", "datetime"], drop=True)
     if not surf_only_nc:
         # Reset more variables
         dset = dset.rename(

diff --git a/monetio/models/hysplit.py b/monetio/models/hysplit.py
@@ -26,7 +26,8 @@
 Change log
 
 2021 13 May  AMC  get_latlongrid needed to be updated to match makegrid method.
-
+2022 14 Nov  AMC  initialized self.dset in __init__() in ModelBin class
+2022 14 Nov  AMC  modified fix_grid_continuity to not fail if passed empty Dataset.
 
 """
 import datetime
@@ -183,6 +184,7 @@ def __init__(
         self.dlat = None
         self.dlon = None
         self.levels = None
+        self.dset = xr.Dataset()
 
         if readwrite == "r":
             if verbose:
@@ -463,6 +465,8 @@ def makegrid(self, xindx, yindx):
         slon = self.llcrnr_lon
         lat = np.arange(slat, slat + self.nlat * self.dlat, self.dlat)
         lon = np.arange(slon, slon + self.nlon * self.dlon, self.dlon)
+        # hysplit always uses grid from -180 to 180
+        lon = np.array([x - 360 if x > 180 else x for x in lon])
         # fortran array indice start at 1. so xindx >=1.
         # python array indice start at 0.
         lonlist = [lon[x - 1] for x in xindx]
@@ -488,7 +492,7 @@ class and need to be converted to a python
         """
         # 8/16/2016 moved species=[]  to before while loop. Added print
         # statements when verbose.
-        self.dset = None
+        # self.dset = xr.Dataset()
         # dictionaries which will be turned into the dset attributes.
         ahash = {}
         fid = open(filename, "rb")
@@ -590,7 +594,7 @@ class and need to be converted to a python
                         #    print("Adding ", "Pollutant", pollutant, "Level", lev)
                         # if this is the first time through. create dataframe
                         # for first level and pollutant.
-                        if self.dset is None:
+                        if not self.dset.any():
                             self.dset = dset
                         else:  # create dataframe for level and pollutant and
                             # then merge with main dataframe.
@@ -614,8 +618,7 @@ class and need to be converted to a python
         self.atthash["Species ID"] = list(set(self.atthash["Species ID"]))
         self.atthash["Coordinate time description"] = "Beginning of sampling time"
         # END OF Loop to go through each sampling time
-        if self.dset is None:
-            print("DSET is NONE")
+        if not self.dset.any():
             return False
         if self.dset.variables:
             self.dset.attrs = self.atthash
@@ -663,7 +666,7 @@ def combine_dataset(
 
     Files need to have the same concentration grid defined.
     """
-    iii = 0
+    # iii = 0
     mlat_p = mlon_p = None
     ylist = []
     dtlist = []
@@ -686,7 +689,7 @@ def combine_dataset(
     xlist = []
     sourcelist = []
     enslist = []
-    for key in blist:
+    for iii, key in enumerate(blist):
         # fname = val[0]
         xsublist = []
         for fname in blist[key]:
@@ -736,33 +739,33 @@ def combine_dataset(
             else:
                 aaa, xnew = xr.align(xrash, xnew, join="outer")
                 xnew = xnew.fillna(0)
-            iii += 1
+            # iii += 1
         sourcelist.append(key)
         xlist.append(xsublist)
     # if verbose:
     #    print("aligned --------------------------------------")
     # xnew is now encompasses the area of all the data-arrays
     # now go through and expand each one to the size of xnew.
-    iii = 0
-    jjj = 0
+    # iii = 0
+    # jjj = 0
     ylist = []
     slist = []
-    for sublist in xlist:
+    for jjj, sublist in enumerate(xlist):
         hlist = []
-        for temp in sublist:
+        for iii, temp in enumerate(sublist):
             # expand to same region as xnew
             aaa, bbb = xr.align(temp, xnew, join="outer")
             aaa = aaa.fillna(0)
             bbb = bbb.fillna(0)
             aaa.expand_dims("ens")
             aaa["ens"] = enslist[iii]
-            iii += 1
+            # iii += 1
             hlist.append(aaa)
         # concat along the 'ens' axis
         new = xr.concat(hlist, "ens")
         ylist.append(new)
         slist.append(sourcelist[jjj])
-        jjj += 1
+        # jjj += 1
     if dtlist:
         dtlist = list(set(dtlist))
         dt = dtlist[0]
@@ -811,6 +814,10 @@ def reset_latlon_coords(hxr):
 
 
 def fix_grid_continuity(dset):
+    # if dset is empty don't do anything
+    if not dset.any():
+        return dset
+
     # if grid already continuos don't do anything.
     if check_grid_continuity(dset):
         return dset
@@ -884,8 +891,13 @@ def get_latlongrid(dset, xindx, yindx):
 
     lat = np.arange(llcrnr_lat, llcrnr_lat + nlat * dlat, dlat)
     lon = np.arange(llcrnr_lon, llcrnr_lon + nlon * dlon, dlon)
-    lonlist = [lon[x - 1] for x in xindx]
-    latlist = [lat[x - 1] for x in yindx]
+    lon = np.array([x - 360 if x > 180 else x for x in lon])
+    try:
+        lonlist = [lon[x - 1] for x in xindx]
+        latlist = [lat[x - 1] for x in yindx]
+    except Exception as eee:
+        print(f"Exception {eee}")
+        print("try increasing Number Lat Points or Number Lon Points")
     mgrid = np.meshgrid(lonlist, latlist)
     return mgrid
 
@@ -916,6 +928,7 @@ def getlatlon(dset):
     dlon = dset.attrs["Longitude Spacing"]
     lat = np.arange(llcrnr_lat, llcrnr_lat + nlat * dlat, dlat)
     lon = np.arange(llcrnr_lon, llcrnr_lon + nlon * dlon, dlon)
+    lon = np.array([x - 360 if x > 180 else x for x in lon])
     return lat, lon
 
 
@@ -936,7 +949,10 @@ def hysp_massload(dset, threshold=0, mult=1, zvals=None):
     aml_alts = calc_aml(dset)
     # Then choose which levels to use for total mass loading.
     if zvals:
-        aml_alts = aml_alts.sel(z=zvals)
+        aml_alts = aml_alts.isel(z=zvals)
+        if "z" not in aml_alts.dims:
+            aml_alts = aml_alts.expand_dims("z")
+    #
     total_aml = aml_alts.sum(dim="z")
     # Calculate conversion factors
     # unitmass, mass63 = calc_MER(dset)
@@ -1054,14 +1070,79 @@ def add_species(dset, species=None):
     while ppp < len(tmp):
         total_par = total_par + tmp[ppp]
         ppp += 1  # End of loop adding all species
-    total_par = total_par.assign_attrs({"Species ID": sflist})
+    atthash = dset.attrs
+    atthash["Species ID"] = sflist
+    total_par = total_par.assign_attrs(atthash)
     return total_par
 
 
+def calculate_thickness(cdump):
+    alts = cdump.z.values
+    thash = {}
+    aaa = 0
+    for avalue in alts:
+        thash[avalue] = avalue - aaa
+        aaa = avalue
+    print(f"WARNING: thickness calculated from z values please verify {thash}")
+    return thash
+
+
+def get_thickness(cdump):
+    """
+    Input:
+    cdump : xarray DataArray with 'Level top heights (m)' as an attribute.
+    Returns:
+    thash : dictionary
+    key is the name of the z coordinate and value is the thickness of that layer in meters.
+    """
+    cstr = "Level top heights (m)"
+    if cstr not in cdump.attrs.keys():
+        print(f"warning: {cstr} attribute needed to calculate level thicknesses")
+        print("warning: alternative calcuation from z dimension values")
+        thash = calculate_thickness(cdump)
+    else:
+        levs = cdump.attrs[cstr]
+        thash = {}
+        aaa = 0
+        for level in levs:
+            thash[level] = level - aaa
+            aaa = level
+    return thash
+
+
 def _delta_multiply(pars):
     """
     # Calculate the delta altitude for each layer and
     # multiplies concentration by layer thickness to return mass load.
+    # requires that the 'Level top heights (m)' is an attribute of pars.
+
+    # pars: xarray data array
+            concentration with z coordinate.
+    # OUTPUT
+    # newpar : xarray data array
+            mass loading.
+    """
+    thash = get_thickness(pars)
+    for iii, zzz in enumerate(pars.z.values):
+        delta = thash[zzz]
+        mml = pars.isel(z=iii) * delta
+        if iii == 0:
+            newpar = mml
+        else:
+            newpar = xr.concat([newpar, mml], "z")
+        if "z" not in newpar.dims:
+            newpar = newpar.expand_dims("z")
+    return newpar
+
+
+def _delta_multiply_old(pars):
+    """
+    # This method was faulty because layers with no concentrations were
+    # omitted. e.g. if layers were at 0,1000,2000,3000,4000,5000 but there were
+    # no mass below 20000 then would only see layers 3000,4000,5000 and thickness
+    # of 3000 layer would be calculated as 3000 instead of 1000.
+    # Calculate the delta altitude for each layer and
+    # multiplies concentration by layer thickness to return mass load.
 
     # pars: xarray data array
             concentration with z coordinate.
@@ -1086,6 +1167,8 @@ def _delta_multiply(pars):
             newpar = mml
         else:
             newpar = xr.concat([newpar, mml], "z")
+        if "z" not in newpar.dims:
+            newpar = newpar.expand_dims("z")
         yyy += 1  # End of loop calculating heights
     return newpar
 
@@ -1106,4 +1189,6 @@ def _alt_multiply(pars):
         else:
             newpar = xr.concat([newpar, mml], "z")
         yyy += 1  # End of loop calculating heights
+        if "z" not in newpar.dims:
+            newpar = newpar.expand_dims("z")
     return newpar