Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consistent variable naming #30

Merged
merged 10 commits into from
Aug 21, 2020
Merged
2 changes: 2 additions & 0 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ jobs:

steps:
- uses: actions/checkout@v2
- name: Install udunits2
run: sudo apt-get install libudunits2-dev
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
with:
Expand Down
1 change: 1 addition & 0 deletions eurec4a_environment/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .nomenclature import get_field
from ._version import get_versions

__version__ = get_versions()["version"]
Expand Down
168 changes: 168 additions & 0 deletions eurec4a_environment/nomenclature.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
"""
To ensure that all functions in eurec4a-environment can access the variables
required the module uses a common nomenclature for variable names throughout.
To add a new variable to the nomenclature simply a) add a new constant below
setting the assumed field name and b) add (if available) the CF-conventions
standard name mapping in `CF_STANDARD_NAMES` if the variable has a "standard
name" (See http://cfconventions.org/standard-names.html for the list of
"standard names")
"""
import inspect
import xarray as xr
try:
import cfunits
HAS_UDUNITS2 = True
except FileNotFoundError:
import warnings
HAS_UDUNITS2 = False


# TODO: update temperature to be called `ta` once JOANNE dataset is released
# which uses this definition
TEMPERATURE = "T"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
TEMPERATURE = "T"
TEMPERATURE = "ta"

This is just to be consistent with the latest variable names in JOANNE - most variable names are finalised, but some are still being discussed.. :/

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem :)

# TODO: update altitude to be called `alt` once JOANNE dataset is released
# which uses this definition
ALTITUDE = "height"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
ALTITUDE = "height"
ALTITUDE = "alt"

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I'll update these. Should I update the JOANNE dataset in the intake catalog already?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did these get updated?

RELATIVE_HUMIDITY = "rh"
POTENTIAL_TEMPERATURE = "theta"
PRESSURE = "p"

# From CF-conventions: "specific" means per unit mass. Specific humidity is
# the mass fraction of water vapor in (moist) air.
SPECIFIC_HUMIDITY = "q"

# From CF-conventions: Speed is the magnitude of velocity. Wind is defined as
# a two-dimensional (horizontal) air velocity vector, with no vertical
# component. (Vertical motion in the atmosphere has the standard name
# upward_air_velocity.) The wind speed is the magnitude of the wind velocity
WIND_SPEED = "wspd"

# From CF-conventions: "Eastward" indicates a vector component which is
# positive when directed eastward (negative westward). Wind is defined as a
# two-dimensional (horizontal) air velocity vector, with no vertical component.
# (Vertical motion in the atmosphere has the standard name
# upward_air_velocity.)
ZONAL_WIND = "u"

# From CF-conventions: "Northward" indicates a vector component which is
# positive when directed northward (negative southward). Wind is defined as a
# two-dimensional (horizontal) air velocity vector, with no vertical component.
# (Vertical motion in the atmosphere has the standard name
# upward_air_velocity.)
MERIDIONAL_WIND = "v"


CF_STANDARD_NAMES = {
TEMPERATURE: "air_temperature",
ALTITUDE: "geopotential_height",
RELATIVE_HUMIDITY: "relative_humidity",
POTENTIAL_TEMPERATURE: "air_potential_temperature",
PRESSURE: "air_pressure",
SPECIFIC_HUMIDITY: "specific_humidity",
WIND_SPEED: "wind_speed",
ZONAL_WIND: "eastward_wind",
MERIDIONAL_WIND: "northward_wind",
}


def _get_calling_function_name():
curframe = inspect.currentframe()
calframe = inspect.getouterframes(curframe, 2)
return calframe[1][3]


class FieldMissingException(Exception):
pass


def get_field(ds, field_name, units=None):
"""
Get field described by `field_name` in dataset `ds` and ensure it is in the
units defined by `units` (if `units` != None). The units definition is any
unit definition supported by
[UDUNITS](https://www.unidata.ucar.edu/software/udunits/)
"""
if field_name in ds:
da = ds[field_name]
else:

calling_function_name = _get_calling_function_name()

if field_name not in CF_STANDARD_NAMES:
raise FieldMissingException(
f"Couldn't find the variable `{field_name}` in the provided dataset."
f" To use {calling_function_name} you need to provide a variable"
f" with the name `{field_name}`"
)
else:
standard_name = CF_STANDARD_NAMES[field_name]
matching_dataarrays = {}
vars_and_coords = list(ds.data_vars) + list(ds.coords)
for v in vars_and_coords:
print(v)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we want the print statement

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, thanks :)

if ds[v].attrs.get("standard_name", None) == standard_name:
matching_dataarrays[v] = ds[v]

if len(matching_dataarrays) == 0:
raise FieldMissingException(
f"Couldn't find the variable `{field_name}` in the provided dataset."
f" To use {calling_function_name} you need to provide a variable"
f" with the name `{field_name}` or set the `standard_name`"
f" attribute to `{standard_name}` for one or more of the variables"
" in the dataset"
)
elif len(matching_dataarrays) == 1:
da = list(matching_dataarrays.values())[0]
else:
dims = [da.dims for da in matching_dataarrays.values()]
if len(set(dims)) == 1:
# all variables have the same dims, so we can create a
# composite data-array out of these
var_names = list(matching_dataarrays.keys())
var_dataarrays = list(matching_dataarrays.values())

# TODO: should we check units here too?
da_combined = xr.concat(var_dataarrays, dim="var_name")
da_combined.coords["var_name"] = var_names
da = da_combined
else:
raise FieldMissingException(
"More than one variable was found in the dataset with"
f" the standard name `{standard_name}`, but these couldn't"
" be merged to a single xarray.DataArray because they"
" don't exist in the same coordinate system"
)
pass

if units is None:
return da
else:
# ensure that output is in correct units
if "units" not in da.attrs:
field_name = da.name
raise Exception(
f"Units haven't been set on `{field_name}` field in dataset"
)
if da.attrs["units"] == units:
return da

if not HAS_UDUNITS2:
raise Exception(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be better to have a more specific exception here

"To do correct unit conversion udunits2 is required, without"
" it no unit conversion will be done. udunits2 can be installed"
" with conda, `conda install -c conda-forge udunits2` or see"
" https://stackoverflow.com/a/42387825 for general instructions"
)

old_units = cfunits.Units(da.attrs["units"])
new_units = cfunits.Units(units)
if old_units == new_units:
return da
else:
values_converted = cfunits.Units.conform(da.values, old_units, new_units)
attrs = dict(da.attrs)
attrs["units"] = units
da_converted = xr.DataArray(
values_converted, coords=da.coords, dims=da.dims, attrs=attrs
)
return da_converted
31 changes: 20 additions & 11 deletions eurec4a_environment/variables/boundary_layer/inversion_height.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,35 @@
import xarray as xr

from ... import nomenclature as nom


def find_inversion_height_grad_RH(
ds, altitude="alt", rh="rh", smoothing_win_size=None, z_min=1500, z_max=4000.0
ds,
altitude=nom.ALTITUDE,
rh=nom.RELATIVE_HUMIDITY,
smoothing_win_size=None,
z_min=1500,
z_max=4000.0,
):

"""
Returns inversion height defined as the maximum in the vertical gradient of RH
"""
"""

ds_lowertroposhere = ds.sel({altitude: slice(z_min, z_max)})
da_rh = nom.get_field(ds=ds_lowertroposhere, field_name=rh)
da_z = nom.get_field(ds=ds_lowertroposhere, field_name=altitude)

if smoothing_win_size:
RH = (
ds_lowertroposhere[rh]
.rolling(alt=smoothing_win_size, min_periods=smoothing_win_size, center=True)
.mean(skipna=True)
)
RH = da_rh.rolling(
alt=smoothing_win_size, min_periods=smoothing_win_size, center=True
).mean(skipna=True)
else:
RH = ds_lowertroposhere[rh]
RH = da_rh

RHg = RH.differentiate(coord=altitude)
ix = RHg.argmin(dim=altitude, skipna=True)
da_z = RHg.isel({ altitude: ix }).alt
da_z = RHg.isel({altitude: ix})[altitude]
da_z.attrs["long_name"] = "inversion layer height (from RH gradient)"
da_z.attrs["units"] = "m"
da_z.attrs["units"] = da_z.units

return da_z
22 changes: 7 additions & 15 deletions eurec4a_environment/variables/boundary_layer/lcl.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import numpy as np

from ...constants import cp_d, g
from ... import nomenclature as nom


def find_LCL_Bolton(ds, temperature="T", rh="RH", altitude="alt"):
def find_LCL_Bolton(
ds, temperature=nom.TEMPERATURE, rh=nom.RELATIVE_HUMIDITY, altitude=nom.ALTITUDE
):
"""
Calculates distribution of LCL from RH and T at different vertical levels
returns mean LCL from this distribution
Expand All @@ -28,20 +31,9 @@ def _calc_LCL_Bolton(da_temperature, da_rh, da_altitude):
mean_zlcl = np.mean(zlcl)
return mean_zlcl

# Celsius to Kelvin
if ds[temperature].units == "C":
da_temperature = ds[temperature] + 273.15
da_temperature.attrs["units"] = "K"
else:
da_temperature = ds[temperature]
# RH from % to [0,1] value
if ds[rh].max().values > 2:
da_rh = ds[rh] / 100
da_rh.attrs["units"] = "1"
else:
da_rh = ds[rh]

da_altitude = ds[altitude]
da_temperature = nom.get_field(ds=ds, field_name=temperature, units="K")
da_rh = nom.get_field(ds=ds, field_name=rh, units="1")
da_altitude = nom.get_field(ds=ds, field_name=altitude, units="m")

return _calc_LCL_Bolton(
da_temperature=da_temperature, da_rh=da_rh, da_altitude=da_altitude
Expand Down
37 changes: 22 additions & 15 deletions eurec4a_environment/variables/boundary_layer/mixed_layer_height.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,18 @@
import statsmodels.api as sm


def calc_peak_RH(ds, altitude="alt", rh="RH", z_min=200.0, z_max=900.0):
from ... import nomenclature as nom


def calc_peak_RH(
ds, altitude=nom.ALTITUDE, rh=nom.RELATIVE_HUMIDITY, z_min=200.0, z_max=900.0
):
"""
Calculate height at maximum relative humidity values
"""
ds_mixed_layer = ds.sel({altitude: slice(z_min, z_max)})

da_rh = ds_mixed_layer[rh]
da_rh = nom.get_field(ds=ds_mixed_layer, field_name=rh, units="%")
dims = list(ds.dims.keys())
# drop the height coord
del dims[dims.index(altitude)]
Expand All @@ -26,8 +31,8 @@ def calc_peak_RH(ds, altitude="alt", rh="RH", z_min=200.0, z_max=900.0):

def calc_peakRH_linearize(
ds,
altitude="height",
rh="rh",
altitude=nom.ALTITUDE,
rh=nom.RELATIVE_HUMIDITY,
time_dim="sounding",
z_min=200.0,
z_max=1500.0,
Expand All @@ -47,24 +52,26 @@ def calc_peakRH_linearize(
Outputs:
-- da: datarray containing h_peakRH_linfit
"""
h_peakRH_linfit = np.zeros(len(ds[rh]))
da_rh = nom.get_field(ds=ds, field_name=rh, units="%")
da_alt = nom.get_field(ds=ds, field_name=altitude, units="m")

dz = int(ds[altitude].diff(dim=altitude)[1]) # dz=10m
h_peakRH_linfit = np.zeros(len(da_rh))

mixed_layer = np.logical_and(ds[altitude] >= z_min, ds[altitude] <= z_max)
dz = int(da_alt.diff(dim=altitude)[1]) # dz=10m

mixed_layer = np.logical_and(da_alt >= z_min, da_alt <= z_max)
ml_linfit = np.logical_and(
ds[altitude] >= z_min_lin, ds[altitude] <= z_max_lin
da_alt >= z_min_lin, da_alt <= z_max_lin
) # for linearized RH profile

for i in range(len(ds[rh])):

rh_profile = ds[rh].isel({time_dim: i}).interpolate_na(dim=altitude)
for i in range(len(da_rh)):
rh_profile = da_rh.isel({time_dim: i}).interpolate_na(dim=altitude)
X_height = rh_profile[ml_linfit][altitude]
X = sm.add_constant(X_height.values) # add intercept
model = sm.OLS(
rh_profile[ml_linfit].values, X
).fit() # instantiate linear model
linearized_RH = model.predict(sm.add_constant(ds[altitude]))
linearized_RH = model.predict(sm.add_constant(da_alt))

# 'find_peaks' is scipy function
idx_peaks_RH_raw, _ = find_peaks(rh_profile[mixed_layer])
Expand Down Expand Up @@ -94,7 +101,7 @@ def calc_peakRH_linearize(


def calc_from_gradient(
ds, var, threshold, z_min=200, altitude="height", time="sounding"
ds, var, threshold, z_min=200, altitude=nom.ALTITUDE, time="sounding"
):
"""
Find mixed layer height as layer over which x(z+1) - x(z) < threshold
Expand All @@ -116,7 +123,7 @@ def calculateHmix_var(
var_profile,
threshold,
z_min,
altitude="height",
altitude=nom.ALTITUDE,
time="sounding",
):

Expand Down Expand Up @@ -156,7 +163,7 @@ def calculateHmix_var(
var_profile,
threshold,
z_min,
altitude="height",
altitude=altitude,
time="sounding",
)

Expand Down
Loading