diff --git a/dfm_tools/get_nc.py b/dfm_tools/get_nc.py index 7fff9f2b7..1b085c19a 100644 --- a/dfm_tools/get_nc.py +++ b/dfm_tools/get_nc.py @@ -39,8 +39,7 @@ import xarray as xr import matplotlib.pyplot as plt from dfm_tools.xarray_helpers import Dataset_varswithdim -from dfm_tools.xugrid_helpers import get_vertical_dimensions -import netCDF4 +from dfm_tools.xugrid_helpers import get_vertical_dimensions, decode_default_fillvals def calc_dist_pythagoras(x1,x2,y1,y2): @@ -276,9 +275,11 @@ def reconstruct_zw_zcc_fromzsigma(uds): reconstruct full grid output (time/face-varying z-values) for zsigmavalue model without full grid output. Implemented in https://issuetracker.deltares.nl/browse/UNST-5477 based on https://cfconventions.org/cf-conventions/cf-conventions.html#_ocean_sigma_over_z_coordinate """ - #TODO: center values are clipped to bedlevel, so the center values of the bottom layer are currently incorrect - #TODO: default fillvalues are not automatically parsed to nan, so doing it manually: https://github.com/pydata/xarray/issues/2742 - fillvals = netCDF4.default_fillvals + # TODO: center values are clipped to bedlevel, so the center values of the bottom layer are currently incorrect + + # temporarily decode default fillvalues + # TODO: xarray only decodes explicitly set fillvalues: https://github.com/Deltares/dfm_tools/issues/490 + uds = xu.UgridDataset(decode_default_fillvals(uds.obj),grids=[uds.grid]) osz_formulaterms_int_dict = get_formula_terms(uds,varn_contains='interface') osz_formulaterms_lay_dict = get_formula_terms(uds,varn_contains='layer') @@ -287,13 +288,9 @@ def reconstruct_zw_zcc_fromzsigma(uds): uds_depth = uds[osz_formulaterms_int_dict['depth']] #mesh2d_bldepth: positive version of mesh2d_flowelem_bl, but this one is always in file uds_depth_c = uds[osz_formulaterms_int_dict['depth_c']] #mesh2d_sigmazdepth uds_zlev_int = uds[osz_formulaterms_int_dict['zlev']] #mesh2d_interface_z - uds_zlev_int = uds_zlev_int.where(uds_zlev_int!=fillvals['f8']) uds_sigma_int = uds[osz_formulaterms_int_dict['sigma']] #mesh2d_interface_sigma - uds_sigma_int = uds_sigma_int.where(uds_sigma_int!=fillvals['f8']) uds_zlev_lay = uds[osz_formulaterms_lay_dict['zlev']] #mesh2d_layer_z - uds_zlev_lay = uds_zlev_lay.where(uds_zlev_lay!=fillvals['f8']) uds_sigma_lay = uds[osz_formulaterms_lay_dict['sigma']] #mesh2d_layer_sigma - uds_sigma_lay = uds_sigma_lay.where(uds_sigma_lay!=fillvals['f8']) # for levels k where sigma(k) has a defined value and zlev(k) is not defined: # z(n,k,j,i) = eta(n,j,i) + sigma(k)*(min(depth_c,depth(j,i))+eta(n,j,i)) diff --git a/dfm_tools/xugrid_helpers.py b/dfm_tools/xugrid_helpers.py index 4ba3604a8..4385851c4 100644 --- a/dfm_tools/xugrid_helpers.py +++ b/dfm_tools/xugrid_helpers.py @@ -130,7 +130,33 @@ def remove_unassociated_edges(ds: xr.Dataset, topology: str = None) -> xr.Datase return ds -def open_partitioned_dataset(file_nc, remove_ghost=True, **kwargs): +def decode_default_fillvals(ds): + """ + xarray only supports explicitly set _FillValue attrs, and therefore ignores the default netCDF4 fillvalue + This function adds the default fillvalue as _FillValue attribute and decodes the dataset again. + """ + # TODO: this function can be removed when xarray does it automatically: https://github.com/Deltares/dfm_tools/issues/490 + + from netCDF4 import default_fillvals + nfillattrs_added = 0 + for varn in ds.variables: + # TODO: possible to get always_mask boolean with `netCDF4.Dataset(file_nc).variables[varn].always_mask`, but this seems to be always True for FM mapfiles + if '_FillValue' in ds[varn].encoding: + continue + dtype_str = ds[varn].dtype.str[1:] + if dtype_str not in default_fillvals.keys(): + continue + varn_fillval = default_fillvals[dtype_str] + ds[varn] = ds[varn].assign_attrs({'_FillValue':varn_fillval}) + nfillattrs_added += 1 + print(f'[default_fillvals decoded for {nfillattrs_added} variables] ',end='') + + #decode the dataset with newly added _FillValue attrs again + ds = xr.decode_cf(ds) + return ds + + +def open_partitioned_dataset(file_nc, decode_fillvals=False, remove_edges=True, remove_ghost=True, **kwargs): """ using xugrid to read and merge partitions, with some additional features (remaning old layerdim, timings, set zcc/zw as data_vars) @@ -166,7 +192,7 @@ def open_partitioned_dataset(file_nc, remove_ghost=True, **kwargs): """ #TODO: FM-mapfiles contain wgs84/projected_coordinate_system variables. xugrid has .crs property, projected_coordinate_system/wgs84 should be updated to be crs so it will be automatically handled? >> make dflowfm issue (and https://github.com/Deltares/xugrid/issues/42) - #TODO: add support for multiple grids via keyword? GTSM+riv grid also only contains only one grid, so no testcase available + #TODO: add support for multiple grids via keyword? https://github.com/Deltares/dfm_tools/issues/497 #TODO: speed up open_dataset https://github.com/Deltares/dfm_tools/issues/225 (also remove_ghost) if 'chunks' not in kwargs: @@ -181,9 +207,13 @@ def open_partitioned_dataset(file_nc, remove_ghost=True, **kwargs): for iF, file_nc_one in enumerate(file_nc_list): print(iF+1,end=' ') ds = xr.open_dataset(file_nc_one, **kwargs) - ds = remove_unassociated_edges(ds) - if 'nFlowElem' in ds.dims and 'nNetElem' in ds.dims: #for mapformat1 mapfiles: merge different face dimensions (rename nFlowElem to nNetElem) to make sure the dataset topology is correct + if decode_fillvals: + ds = decode_default_fillvals(ds) + if remove_edges: + ds = remove_unassociated_edges(ds) + if 'nFlowElem' in ds.dims and 'nNetElem' in ds.dims: print('[mapformat1] ',end='') + #for mapformat1 mapfiles: merge different face dimensions (rename nFlowElem to nNetElem) to make sure the dataset topology is correct ds = ds.rename({'nFlowElem':'nNetElem'}) uds = xu.core.wrap.UgridDataset(ds) if remove_ghost: #TODO: this makes it way slower (at least for GTSM, although merging seems faster), but is necessary since values on overlapping cells are not always identical (eg in case of Venice ucmag) diff --git a/docs/whats-new.md b/docs/whats-new.md index d8660d40f..8dbb845e1 100644 --- a/docs/whats-new.md +++ b/docs/whats-new.md @@ -1,6 +1,7 @@ ## Unreleased ### Feat +- decoding of default fillvalues with `dfmt.decode_default_fillvals()` by [@veenstrajelmer](https://github.com/veenstrajelmer) in [#496](https://github.com/Deltares/dfm_tools/pull/496) - interpolation of edge variables to faces with `dfmt.uda_edges_to_faces()` by [@veenstrajelmer](https://github.com/veenstrajelmer) in [#482](https://github.com/Deltares/dfm_tools/pull/482) - interpolation of variables on layer interfaces to layer centers with `dfmt.uda_interfaces_to_centers()` by [@veenstrajelmer](https://github.com/veenstrajelmer) in [#482](https://github.com/Deltares/dfm_tools/pull/482) - generate polyline only for open boundary with `dfmt.generate_bndpli_cutland()` (deprecates `dfmt.generate_bndpli()`) by [@veenstrajelmer](https://github.com/veenstrajelmer) in [#466](https://github.com/Deltares/dfm_tools/pull/466) diff --git a/tests/test_external_packages.py b/tests/test_external_packages.py index 891770ffc..97cac1205 100644 --- a/tests/test_external_packages.py +++ b/tests/test_external_packages.py @@ -101,3 +101,52 @@ def test_xarray_interp_to_newdim(): assert (interp_with_floats.isnull()==interp_with_da_existing.isnull()).all() #success assert (interp_with_floats.isnull()==interp_with_da_newdim.isnull()).all() #fails with scipy>=1.10.0 + +@pytest.mark.unittest +def test_xarray_decode_default_fillvals(): + """ + This test will fail as soon as xarray handles default fillvalues: https://github.com/Deltares/dfm_tools/issues/490 + After that, the minimum xarray requirement can be updated + However, py38 support must then be dropped: https://github.com/Deltares/dfm_tools/issues/267 + In that case, this testcase and `dfmt.decode_default_fillvals()` can be removed + """ + + import dfm_tools as dfmt + import xarray as xr + from netCDF4 import default_fillvals + + file_nc = dfmt.data.fm_grevelingen_map(return_filepath=True) + file_nc = file_nc.replace('_0*','_0002') + + ds = xr.open_dataset(file_nc,decode_cf=False) + + #convert fillvalue in fnc to default fillvalue + varn_fnc = 'mesh2d_face_nodes' + fnc_dtype = ds[varn_fnc].dtype.str[1:] + fill_value = ds[varn_fnc].attrs['_FillValue'] + fill_value_default = default_fillvals[fnc_dtype] + ds[varn_fnc].attrs.pop('_FillValue') + ds[varn_fnc] = ds[varn_fnc].where(ds[varn_fnc]!=fill_value,fill_value_default) + + #write file + file_out = 'fnc_default_fillvals_map.nc' + ds.to_netcdf(file_out) + ds.close() + + #open dataset with decode_fillvals + try: + uds = dfmt.open_partitioned_dataset(file_out,decode_fillvals=False) + except Exception as e: + # this raises "ValueError: connectivity contains negative values" + # until xarray handles default fillvalues: https://github.com/Deltares/dfm_tools/issues/490 + assert isinstance(e,ValueError) + if 'uds' in locals(): + raise Exception("apparently xarray now decodes default fillvalues, so " + "`dfmt.decode_default_fillvals()` can be removed if minimum xarray " + "version is set as requirement") + + #this should be successful + uds = dfmt.open_partitioned_dataset(file_out,decode_fillvals=True) + fnc_new = uds.grid.face_node_connectivity + + assert fill_value_default in fnc_new