Skip to content

Commit

Permalink
added add_default_fillvals function that is not used per default (#496)
Browse files Browse the repository at this point in the history
* added decode_default_fillvals function that is not used per default

* used decode_default_fillvals also in reconstruct_zw_zcc_fromzsigma

* updated whatsnew

* updated comments and made remove_unassociated_edges optional

* added testcase
  • Loading branch information
veenstrajelmer authored Aug 3, 2023
1 parent 72bb148 commit d3bcc0b
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 13 deletions.
15 changes: 6 additions & 9 deletions dfm_tools/get_nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@
import xarray as xr
import matplotlib.pyplot as plt
from dfm_tools.xarray_helpers import Dataset_varswithdim
from dfm_tools.xugrid_helpers import get_vertical_dimensions
import netCDF4
from dfm_tools.xugrid_helpers import get_vertical_dimensions, decode_default_fillvals


def calc_dist_pythagoras(x1,x2,y1,y2):
Expand Down Expand Up @@ -276,9 +275,11 @@ def reconstruct_zw_zcc_fromzsigma(uds):
reconstruct full grid output (time/face-varying z-values) for zsigmavalue model without full grid output. Implemented in https://issuetracker.deltares.nl/browse/UNST-5477
based on https://cfconventions.org/cf-conventions/cf-conventions.html#_ocean_sigma_over_z_coordinate
"""
#TODO: center values are clipped to bedlevel, so the center values of the bottom layer are currently incorrect
#TODO: default fillvalues are not automatically parsed to nan, so doing it manually: https://github.com/pydata/xarray/issues/2742
fillvals = netCDF4.default_fillvals
# TODO: center values are clipped to bedlevel, so the center values of the bottom layer are currently incorrect

# temporarily decode default fillvalues
# TODO: xarray only decodes explicitly set fillvalues: https://github.com/Deltares/dfm_tools/issues/490
uds = xu.UgridDataset(decode_default_fillvals(uds.obj),grids=[uds.grid])

osz_formulaterms_int_dict = get_formula_terms(uds,varn_contains='interface')
osz_formulaterms_lay_dict = get_formula_terms(uds,varn_contains='layer')
Expand All @@ -287,13 +288,9 @@ def reconstruct_zw_zcc_fromzsigma(uds):
uds_depth = uds[osz_formulaterms_int_dict['depth']] #mesh2d_bldepth: positive version of mesh2d_flowelem_bl, but this one is always in file
uds_depth_c = uds[osz_formulaterms_int_dict['depth_c']] #mesh2d_sigmazdepth
uds_zlev_int = uds[osz_formulaterms_int_dict['zlev']] #mesh2d_interface_z
uds_zlev_int = uds_zlev_int.where(uds_zlev_int!=fillvals['f8'])
uds_sigma_int = uds[osz_formulaterms_int_dict['sigma']] #mesh2d_interface_sigma
uds_sigma_int = uds_sigma_int.where(uds_sigma_int!=fillvals['f8'])
uds_zlev_lay = uds[osz_formulaterms_lay_dict['zlev']] #mesh2d_layer_z
uds_zlev_lay = uds_zlev_lay.where(uds_zlev_lay!=fillvals['f8'])
uds_sigma_lay = uds[osz_formulaterms_lay_dict['sigma']] #mesh2d_layer_sigma
uds_sigma_lay = uds_sigma_lay.where(uds_sigma_lay!=fillvals['f8'])

# for levels k where sigma(k) has a defined value and zlev(k) is not defined:
# z(n,k,j,i) = eta(n,j,i) + sigma(k)*(min(depth_c,depth(j,i))+eta(n,j,i))
Expand Down
38 changes: 34 additions & 4 deletions dfm_tools/xugrid_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,33 @@ def remove_unassociated_edges(ds: xr.Dataset, topology: str = None) -> xr.Datase
return ds


def open_partitioned_dataset(file_nc, remove_ghost=True, **kwargs):
def decode_default_fillvals(ds):
"""
xarray only supports explicitly set _FillValue attrs, and therefore ignores the default netCDF4 fillvalue
This function adds the default fillvalue as _FillValue attribute and decodes the dataset again.
"""
# TODO: this function can be removed when xarray does it automatically: https://github.com/Deltares/dfm_tools/issues/490

from netCDF4 import default_fillvals
nfillattrs_added = 0
for varn in ds.variables:
# TODO: possible to get always_mask boolean with `netCDF4.Dataset(file_nc).variables[varn].always_mask`, but this seems to be always True for FM mapfiles
if '_FillValue' in ds[varn].encoding:
continue
dtype_str = ds[varn].dtype.str[1:]
if dtype_str not in default_fillvals.keys():
continue
varn_fillval = default_fillvals[dtype_str]
ds[varn] = ds[varn].assign_attrs({'_FillValue':varn_fillval})
nfillattrs_added += 1
print(f'[default_fillvals decoded for {nfillattrs_added} variables] ',end='')

#decode the dataset with newly added _FillValue attrs again
ds = xr.decode_cf(ds)
return ds


def open_partitioned_dataset(file_nc, decode_fillvals=False, remove_edges=True, remove_ghost=True, **kwargs):
"""
using xugrid to read and merge partitions, with some additional features (remaning old layerdim, timings, set zcc/zw as data_vars)
Expand Down Expand Up @@ -166,7 +192,7 @@ def open_partitioned_dataset(file_nc, remove_ghost=True, **kwargs):
"""
#TODO: FM-mapfiles contain wgs84/projected_coordinate_system variables. xugrid has .crs property, projected_coordinate_system/wgs84 should be updated to be crs so it will be automatically handled? >> make dflowfm issue (and https://github.com/Deltares/xugrid/issues/42)
#TODO: add support for multiple grids via keyword? GTSM+riv grid also only contains only one grid, so no testcase available
#TODO: add support for multiple grids via keyword? https://github.com/Deltares/dfm_tools/issues/497
#TODO: speed up open_dataset https://github.com/Deltares/dfm_tools/issues/225 (also remove_ghost)

if 'chunks' not in kwargs:
Expand All @@ -181,9 +207,13 @@ def open_partitioned_dataset(file_nc, remove_ghost=True, **kwargs):
for iF, file_nc_one in enumerate(file_nc_list):
print(iF+1,end=' ')
ds = xr.open_dataset(file_nc_one, **kwargs)
ds = remove_unassociated_edges(ds)
if 'nFlowElem' in ds.dims and 'nNetElem' in ds.dims: #for mapformat1 mapfiles: merge different face dimensions (rename nFlowElem to nNetElem) to make sure the dataset topology is correct
if decode_fillvals:
ds = decode_default_fillvals(ds)
if remove_edges:
ds = remove_unassociated_edges(ds)
if 'nFlowElem' in ds.dims and 'nNetElem' in ds.dims:
print('[mapformat1] ',end='')
#for mapformat1 mapfiles: merge different face dimensions (rename nFlowElem to nNetElem) to make sure the dataset topology is correct
ds = ds.rename({'nFlowElem':'nNetElem'})
uds = xu.core.wrap.UgridDataset(ds)
if remove_ghost: #TODO: this makes it way slower (at least for GTSM, although merging seems faster), but is necessary since values on overlapping cells are not always identical (eg in case of Venice ucmag)
Expand Down
1 change: 1 addition & 0 deletions docs/whats-new.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## Unreleased

### Feat
- decoding of default fillvalues with `dfmt.decode_default_fillvals()` by [@veenstrajelmer](https://github.com/veenstrajelmer) in [#496](https://github.com/Deltares/dfm_tools/pull/496)
- interpolation of edge variables to faces with `dfmt.uda_edges_to_faces()` by [@veenstrajelmer](https://github.com/veenstrajelmer) in [#482](https://github.com/Deltares/dfm_tools/pull/482)
- interpolation of variables on layer interfaces to layer centers with `dfmt.uda_interfaces_to_centers()` by [@veenstrajelmer](https://github.com/veenstrajelmer) in [#482](https://github.com/Deltares/dfm_tools/pull/482)
- generate polyline only for open boundary with `dfmt.generate_bndpli_cutland()` (deprecates `dfmt.generate_bndpli()`) by [@veenstrajelmer](https://github.com/veenstrajelmer) in [#466](https://github.com/Deltares/dfm_tools/pull/466)
Expand Down
49 changes: 49 additions & 0 deletions tests/test_external_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,52 @@ def test_xarray_interp_to_newdim():
assert (interp_with_floats.isnull()==interp_with_da_existing.isnull()).all() #success
assert (interp_with_floats.isnull()==interp_with_da_newdim.isnull()).all() #fails with scipy>=1.10.0


@pytest.mark.unittest
def test_xarray_decode_default_fillvals():
"""
This test will fail as soon as xarray handles default fillvalues: https://github.com/Deltares/dfm_tools/issues/490
After that, the minimum xarray requirement can be updated
However, py38 support must then be dropped: https://github.com/Deltares/dfm_tools/issues/267
In that case, this testcase and `dfmt.decode_default_fillvals()` can be removed
"""

import dfm_tools as dfmt
import xarray as xr
from netCDF4 import default_fillvals

file_nc = dfmt.data.fm_grevelingen_map(return_filepath=True)
file_nc = file_nc.replace('_0*','_0002')

ds = xr.open_dataset(file_nc,decode_cf=False)

#convert fillvalue in fnc to default fillvalue
varn_fnc = 'mesh2d_face_nodes'
fnc_dtype = ds[varn_fnc].dtype.str[1:]
fill_value = ds[varn_fnc].attrs['_FillValue']
fill_value_default = default_fillvals[fnc_dtype]
ds[varn_fnc].attrs.pop('_FillValue')
ds[varn_fnc] = ds[varn_fnc].where(ds[varn_fnc]!=fill_value,fill_value_default)

#write file
file_out = 'fnc_default_fillvals_map.nc'
ds.to_netcdf(file_out)
ds.close()

#open dataset with decode_fillvals
try:
uds = dfmt.open_partitioned_dataset(file_out,decode_fillvals=False)
except Exception as e:
# this raises "ValueError: connectivity contains negative values"
# until xarray handles default fillvalues: https://github.com/Deltares/dfm_tools/issues/490
assert isinstance(e,ValueError)
if 'uds' in locals():
raise Exception("apparently xarray now decodes default fillvalues, so "
"`dfmt.decode_default_fillvals()` can be removed if minimum xarray "
"version is set as requirement")

#this should be successful
uds = dfmt.open_partitioned_dataset(file_out,decode_fillvals=True)
fnc_new = uds.grid.face_node_connectivity

assert fill_value_default in fnc_new

0 comments on commit d3bcc0b

Please sign in to comment.