Skip to content

Commit

Permalink
Consistent variable naming (with unit conversion) (#30)
Browse files Browse the repository at this point in the history
* add nomenclature with tests
* refactor mixed-layer-height methods to use nomenclature
* add units support to nomenclature (using cfunits)
* Refactor to use nomenclature throughout
  • Loading branch information
leifdenby authored Aug 21, 2020
1 parent c8dd988 commit c789425
Show file tree
Hide file tree
Showing 11 changed files with 376 additions and 75 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ jobs:

steps:
- uses: actions/checkout@v2
- name: Install udunits2
run: sudo apt-get install libudunits2-dev
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
with:
Expand Down
1 change: 1 addition & 0 deletions eurec4a_environment/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .nomenclature import get_field
from ._version import get_versions

__version__ = get_versions()["version"]
Expand Down
170 changes: 170 additions & 0 deletions eurec4a_environment/nomenclature.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
"""
To ensure that all functions in eurec4a-environment can access the variables
required the module uses a common nomenclature for variable names throughout.
To add a new variable to the nomenclature simply a) add a new constant below
setting the assumed field name and b) add (if available) the CF-conventions
standard name mapping in `CF_STANDARD_NAMES` if the variable has a "standard
name" (See http://cfconventions.org/standard-names.html for the list of
"standard names")
"""
import inspect
import xarray as xr
try:
import cfunits
HAS_UDUNITS2 = True
except FileNotFoundError:
import warnings
HAS_UDUNITS2 = False


# TODO: update temperature to be called `ta` once JOANNE dataset is released
# which uses this definition
TEMPERATURE = "T"
# TODO: update altitude to be called `alt` once JOANNE dataset is released
# which uses this definition
ALTITUDE = "height"
RELATIVE_HUMIDITY = "rh"
POTENTIAL_TEMPERATURE = "theta"
PRESSURE = "p"

# From CF-conventions: "specific" means per unit mass. Specific humidity is
# the mass fraction of water vapor in (moist) air.
SPECIFIC_HUMIDITY = "q"

# From CF-conventions: Speed is the magnitude of velocity. Wind is defined as
# a two-dimensional (horizontal) air velocity vector, with no vertical
# component. (Vertical motion in the atmosphere has the standard name
# upward_air_velocity.) The wind speed is the magnitude of the wind velocity
WIND_SPEED = "wspd"

# From CF-conventions: "Eastward" indicates a vector component which is
# positive when directed eastward (negative westward). Wind is defined as a
# two-dimensional (horizontal) air velocity vector, with no vertical component.
# (Vertical motion in the atmosphere has the standard name
# upward_air_velocity.)
ZONAL_WIND = "u"

# From CF-conventions: "Northward" indicates a vector component which is
# positive when directed northward (negative southward). Wind is defined as a
# two-dimensional (horizontal) air velocity vector, with no vertical component.
# (Vertical motion in the atmosphere has the standard name
# upward_air_velocity.)
MERIDIONAL_WIND = "v"


CF_STANDARD_NAMES = {
TEMPERATURE: "air_temperature",
ALTITUDE: "geopotential_height",
RELATIVE_HUMIDITY: "relative_humidity",
POTENTIAL_TEMPERATURE: "air_potential_temperature",
PRESSURE: "air_pressure",
SPECIFIC_HUMIDITY: "specific_humidity",
WIND_SPEED: "wind_speed",
ZONAL_WIND: "eastward_wind",
MERIDIONAL_WIND: "northward_wind",
}


def _get_calling_function_name():
curframe = inspect.currentframe()
calframe = inspect.getouterframes(curframe, 2)
return calframe[1][3]


class FieldMissingException(Exception):
pass

class UDUNTS2_MissingException(Exception):
pass


def get_field(ds, field_name, units=None):
"""
Get field described by `field_name` in dataset `ds` and ensure it is in the
units defined by `units` (if `units` != None). The units definition is any
unit definition supported by
[UDUNITS](https://www.unidata.ucar.edu/software/udunits/)
"""
if field_name in ds:
da = ds[field_name]
else:

calling_function_name = _get_calling_function_name()

if field_name not in CF_STANDARD_NAMES:
raise FieldMissingException(
f"Couldn't find the variable `{field_name}` in the provided dataset."
f" To use {calling_function_name} you need to provide a variable"
f" with the name `{field_name}`"
)
else:
standard_name = CF_STANDARD_NAMES[field_name]
matching_dataarrays = {}
vars_and_coords = list(ds.data_vars) + list(ds.coords)
for v in vars_and_coords:
if ds[v].attrs.get("standard_name", None) == standard_name:
matching_dataarrays[v] = ds[v]

if len(matching_dataarrays) == 0:
raise FieldMissingException(
f"Couldn't find the variable `{field_name}` in the provided dataset."
f" To use {calling_function_name} you need to provide a variable"
f" with the name `{field_name}` or set the `standard_name`"
f" attribute to `{standard_name}` for one or more of the variables"
" in the dataset"
)
elif len(matching_dataarrays) == 1:
da = list(matching_dataarrays.values())[0]
else:
dims = [da.dims for da in matching_dataarrays.values()]
if len(set(dims)) == 1:
# all variables have the same dims, so we can create a
# composite data-array out of these
var_names = list(matching_dataarrays.keys())
var_dataarrays = list(matching_dataarrays.values())

# TODO: should we check units here too?
da_combined = xr.concat(var_dataarrays, dim="var_name")
da_combined.coords["var_name"] = var_names
da = da_combined
else:
raise FieldMissingException(
"More than one variable was found in the dataset with"
f" the standard name `{standard_name}`, but these couldn't"
" be merged to a single xarray.DataArray because they"
" don't exist in the same coordinate system"
)
pass

if units is None:
return da
else:
# ensure that output is in correct units
if "units" not in da.attrs:
field_name = da.name
raise Exception(
f"Units haven't been set on `{field_name}` field in dataset"
)
if da.attrs["units"] == units:
return da

if not HAS_UDUNITS2:
raise UDUNTS2_MissingException(
"To do correct unit conversion udunits2 is required, without"
" it no unit conversion will be done. udunits2 can be installed"
" with conda, `conda install -c conda-forge udunits2` or see"
" https://stackoverflow.com/a/42387825 for general instructions"
)

old_units = cfunits.Units(da.attrs["units"])
new_units = cfunits.Units(units)
if old_units == new_units:
return da
else:
values_converted = cfunits.Units.conform(da.values, old_units, new_units)
attrs = dict(da.attrs)
attrs["units"] = units
da_converted = xr.DataArray(
values_converted, coords=da.coords, dims=da.dims, attrs=attrs
)
return da_converted
31 changes: 20 additions & 11 deletions eurec4a_environment/variables/boundary_layer/inversion_height.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,35 @@
import xarray as xr

from ... import nomenclature as nom


def find_inversion_height_grad_RH(
ds, altitude="alt", rh="rh", smoothing_win_size=None, z_min=1500, z_max=4000.0
ds,
altitude=nom.ALTITUDE,
rh=nom.RELATIVE_HUMIDITY,
smoothing_win_size=None,
z_min=1500,
z_max=4000.0,
):

"""
Returns inversion height defined as the maximum in the vertical gradient of RH
"""
"""

ds_lowertroposhere = ds.sel({altitude: slice(z_min, z_max)})
da_rh = nom.get_field(ds=ds_lowertroposhere, field_name=rh)
da_z = nom.get_field(ds=ds_lowertroposhere, field_name=altitude)

if smoothing_win_size:
RH = (
ds_lowertroposhere[rh]
.rolling(alt=smoothing_win_size, min_periods=smoothing_win_size, center=True)
.mean(skipna=True)
)
RH = da_rh.rolling(
alt=smoothing_win_size, min_periods=smoothing_win_size, center=True
).mean(skipna=True)
else:
RH = ds_lowertroposhere[rh]
RH = da_rh

RHg = RH.differentiate(coord=altitude)
ix = RHg.argmin(dim=altitude, skipna=True)
da_z = RHg.isel({ altitude: ix }).alt
da_z = RHg.isel({altitude: ix})[altitude]
da_z.attrs["long_name"] = "inversion layer height (from RH gradient)"
da_z.attrs["units"] = "m"
da_z.attrs["units"] = da_z.units

return da_z
22 changes: 7 additions & 15 deletions eurec4a_environment/variables/boundary_layer/lcl.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import numpy as np

from ...constants import cp_d, g
from ... import nomenclature as nom


def find_LCL_Bolton(ds, temperature="T", rh="RH", altitude="alt"):
def find_LCL_Bolton(
ds, temperature=nom.TEMPERATURE, rh=nom.RELATIVE_HUMIDITY, altitude=nom.ALTITUDE
):
"""
Calculates distribution of LCL from RH and T at different vertical levels
returns mean LCL from this distribution
Expand All @@ -28,20 +31,9 @@ def _calc_LCL_Bolton(da_temperature, da_rh, da_altitude):
mean_zlcl = np.mean(zlcl)
return mean_zlcl

# Celsius to Kelvin
if ds[temperature].units == "C":
da_temperature = ds[temperature] + 273.15
da_temperature.attrs["units"] = "K"
else:
da_temperature = ds[temperature]
# RH from % to [0,1] value
if ds[rh].max().values > 2:
da_rh = ds[rh] / 100
da_rh.attrs["units"] = "1"
else:
da_rh = ds[rh]

da_altitude = ds[altitude]
da_temperature = nom.get_field(ds=ds, field_name=temperature, units="K")
da_rh = nom.get_field(ds=ds, field_name=rh, units="1")
da_altitude = nom.get_field(ds=ds, field_name=altitude, units="m")

return _calc_LCL_Bolton(
da_temperature=da_temperature, da_rh=da_rh, da_altitude=da_altitude
Expand Down
37 changes: 22 additions & 15 deletions eurec4a_environment/variables/boundary_layer/mixed_layer_height.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,18 @@
import statsmodels.api as sm


def calc_peak_RH(ds, altitude="alt", rh="RH", z_min=200.0, z_max=900.0):
from ... import nomenclature as nom


def calc_peak_RH(
ds, altitude=nom.ALTITUDE, rh=nom.RELATIVE_HUMIDITY, z_min=200.0, z_max=900.0
):
"""
Calculate height at maximum relative humidity values
"""
ds_mixed_layer = ds.sel({altitude: slice(z_min, z_max)})

da_rh = ds_mixed_layer[rh]
da_rh = nom.get_field(ds=ds_mixed_layer, field_name=rh, units="%")
dims = list(ds.dims.keys())
# drop the height coord
del dims[dims.index(altitude)]
Expand All @@ -26,8 +31,8 @@ def calc_peak_RH(ds, altitude="alt", rh="RH", z_min=200.0, z_max=900.0):

def calc_peakRH_linearize(
ds,
altitude="height",
rh="rh",
altitude=nom.ALTITUDE,
rh=nom.RELATIVE_HUMIDITY,
time_dim="sounding",
z_min=200.0,
z_max=1500.0,
Expand All @@ -47,24 +52,26 @@ def calc_peakRH_linearize(
Outputs:
-- da: datarray containing h_peakRH_linfit
"""
h_peakRH_linfit = np.zeros(len(ds[rh]))
da_rh = nom.get_field(ds=ds, field_name=rh, units="%")
da_alt = nom.get_field(ds=ds, field_name=altitude, units="m")

dz = int(ds[altitude].diff(dim=altitude)[1]) # dz=10m
h_peakRH_linfit = np.zeros(len(da_rh))

mixed_layer = np.logical_and(ds[altitude] >= z_min, ds[altitude] <= z_max)
dz = int(da_alt.diff(dim=altitude)[1]) # dz=10m

mixed_layer = np.logical_and(da_alt >= z_min, da_alt <= z_max)
ml_linfit = np.logical_and(
ds[altitude] >= z_min_lin, ds[altitude] <= z_max_lin
da_alt >= z_min_lin, da_alt <= z_max_lin
) # for linearized RH profile

for i in range(len(ds[rh])):

rh_profile = ds[rh].isel({time_dim: i}).interpolate_na(dim=altitude)
for i in range(len(da_rh)):
rh_profile = da_rh.isel({time_dim: i}).interpolate_na(dim=altitude)
X_height = rh_profile[ml_linfit][altitude]
X = sm.add_constant(X_height.values) # add intercept
model = sm.OLS(
rh_profile[ml_linfit].values, X
).fit() # instantiate linear model
linearized_RH = model.predict(sm.add_constant(ds[altitude]))
linearized_RH = model.predict(sm.add_constant(da_alt))

# 'find_peaks' is scipy function
idx_peaks_RH_raw, _ = find_peaks(rh_profile[mixed_layer])
Expand Down Expand Up @@ -94,7 +101,7 @@ def calc_peakRH_linearize(


def calc_from_gradient(
ds, var, threshold, z_min=200, altitude="height", time="sounding"
ds, var, threshold, z_min=200, altitude=nom.ALTITUDE, time="sounding"
):
"""
Find mixed layer height as layer over which x(z+1) - x(z) < threshold
Expand All @@ -116,7 +123,7 @@ def calculateHmix_var(
var_profile,
threshold,
z_min,
altitude="height",
altitude=nom.ALTITUDE,
time="sounding",
):

Expand Down Expand Up @@ -156,7 +163,7 @@ def calculateHmix_var(
var_profile,
threshold,
z_min,
altitude="height",
altitude=altitude,
time="sounding",
)

Expand Down
Loading

0 comments on commit c789425

Please sign in to comment.