diff --git a/parcels/_datasets/__init__.py b/parcels/_datasets/__init__.py index c1270f0017..29db98c5fe 100644 --- a/parcels/_datasets/__init__.py +++ b/parcels/_datasets/__init__.py @@ -3,7 +3,7 @@ This subpackage uses xarray to generate *idealised* structured and unstructured hydrodynamical datasets that are compatible with Parcels. The goals are three-fold: -1. To provide users with documentation for the types of datasets they can expect Parcels to work with. +1. To provide users with documentation for the types of datasets they can expect Parcels to work with. When reporting bugs, users can use these datasets to reproduce the bug they're experiencing (allowing developers to quickly troubleshoot the problem). 2. To supply our tutorials with hydrodynamical datasets. 3. To offer developers datasets for use in test cases. @@ -36,8 +36,10 @@ This subpackage is broken down into structured and unstructured parts. Each of these have common submodules: -* ``circulation_model`` -> hardcoded datasets with the intention of mimicking datasets from a certain (ocean) circulation model +* ``circulation_model`` -> hardcoded datasets with the intention of mimicking dataset structure from a certain (ocean) circulation model. If you'd like to see Parcel support a new model, please open an issue in our issue tracker. + * exposes a dict ``datasets`` mapping dataset names to xarray datasets * ``generic`` -> hardcoded datasets that are generic, and not tied to a certain (ocean) circulation model. Instead these focus on the fundamental properties of the dataset + * exposes a dict ``datasets`` mapping dataset names to xarray datasets * ``generated`` -> functions to generate datasets with varying properties * ``utils`` -> any utility functions necessary related to either generating or validating datasets diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py new file mode 100644 index 0000000000..d34bcdc058 --- /dev/null +++ b/parcels/_datasets/structured/circulation_models.py @@ -0,0 +1,1144 @@ +"""Datasets mimicking the layout of real-world hydrodynamic models""" + +import numpy as np +import xarray as xr + +from . import T, X, Y, Z + +__all__ = ["T", "X", "Y", "Z", "datasets"] + +TIME = xr.date_range("2000", "2001", T) + + +def _copernicusmarine(): + """Copernicus Marine Service dataset as retrieved by the `copernicusmarine` toolkit""" + return xr.Dataset( + { + "uo": ( + ["time", "depth", "latitude", "longitude"], + np.random.rand(T, Z, Y, X), + { + "valid_max": 5.0, + "unit_long": "Meters per second", + "units": "m s-1", + "long_name": "Eastward velocity", + "standard_name": "eastward_sea_water_velocity", + "valid_min": -5.0, + }, + ), + "vo": ( + ["time", "depth", "latitude", "longitude"], + np.random.rand(T, Z, Y, X), + { + "valid_max": 5.0, + "unit_long": "Meters per second", + "units": "m s-1", + "long_name": "Northward velocity", + "standard_name": "northward_sea_water_velocity", + "valid_min": -5.0, + }, + ), + }, + coords={ + "depth": ( + ["depth"], + np.linspace(0.49, 5727.92, Z), + { + "unit_long": "Meters", + "units": "m", + "axis": "Z", + "long_name": "depth", + "standard_name": "depth", + "positive": "down", + }, + ), + "latitude": ( + ["latitude"], + np.linspace(-90, 90, Y), + { + "unit_long": "Degrees North", + "units": "degrees_north", + "axis": "Y", + "long_name": "Latitude", + "standard_name": "latitude", + }, + ), + "longitude": ( + ["longitude"], + np.linspace(-180, 180, X), + { + "unit_long": "Degrees East", + "units": "degrees_east", + "axis": "X", + "long_name": "Longitude", + "standard_name": "longitude", + }, + ), + "time": ( + ["time"], + TIME, + { + "unit_long": "Hours Since 1950-01-01", + "axis": "T", + "long_name": "Time", + "standard_name": "time", + }, + ), + }, + ) + + +def _copernicusmarine_globcurrents(): + """Copernicus Marine Service GlobCurrent dataset (MULTIOBS_GLO_PHY_MYNRT_015_003)""" + return xr.Dataset( + { + "ue": ( + ["time", "depth", "latitude", "longitude"], + np.random.rand(T, Z, Y, X), + { + "units": "m/s", + "standard_name": "eastward_sea_water_velocity_due_to_ekman_drift", + "long_name": "Depth Ekman driven velocity : zonal component", + "grid_mapping": "crs", + }, + ), + "ve": ( + ["time", "depth", "latitude", "longitude"], + np.random.rand(T, Z, Y, X), + { + "units": "m/s", + "standard_name": "northward_sea_water_velocity_due_to_ekman_drift", + "long_name": "Depth Ekman driven velocity : meridional component", + "grid_mapping": "crs", + }, + ), + }, + coords={ + "depth": ( + ["depth"], + np.linspace(-0.0, 15, Z), + { + "standard_name": "depth", + "long_name": "Depth", + "units": "m", + "axis": "Z", + "positive": "down", + }, + ), + "latitude": ( + ["latitude"], + np.linspace(-90, 90, Y), + { + "unit_long": "Degrees North", + "units": "degrees_north", + "axis": "Y", + "long_name": "Latitude", + "standard_name": "latitude", + }, + ), + "longitude": ( + ["longitude"], + np.linspace(-180, 180, X), + { + "unit_long": "Degrees East", + "units": "degrees_east", + "axis": "X", + "long_name": "Longitude", + "standard_name": "longitude", + }, + ), + "time": ( + ["time"], + TIME, + { + "axis": "T", + "long_name": "Time", + "standard_name": "time", + }, + ), + }, + ) + + +def _NEMO_MOI_U(): + """NEMO model dataset (U component) as serviced by Mercator Ocean International""" + return xr.Dataset( + { + "vozocrtx": ( + ["deptht", "y", "x"], + np.random.rand(Z, Y, X), + { + "units": "m s-1", + "valid_min": -10.0, + "valid_max": 10.0, + "long_name": "Zonal velocity", + "unit_long": "Meters per second", + "standard_name": "sea_water_x_velocity", + "short_name": "vozocrtx", + "online_operation": "N/A", + "interval_operation": 86400, + "interval_write": 86400, + "associate": "time_counter deptht nav_lat nav_lon", + }, + ), + "sotkeavmu1": ( + ["y", "x"], + np.random.rand(Y, X), + { + "units": "m2 s-1", + "valid_min": 0.0, + "valid_max": 100.0, + "long_name": "Vertical Eddy Viscosity U 1m", + "standard_name": "ocean_vertical_eddy_viscosity_u_1m", + "short_name": "sotkeavmu1", + "online_operation": "N/A", + "interval_operation": 86400, + "interval_write": 86400, + "associate": "time_counter nav_lat nav_lon", + }, + ), + }, + coords={ + "nav_lon": ( + ["y", "x"], + np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear + { + "units": "degrees_east", + "valid_min": -179.99984754002182, + "valid_max": 179.999842386314, + "long_name": "Longitude", + "nav_model": "Default grid", + "standard_name": "longitude", + }, + ), + "nav_lat": ( + ["y", "x"], + np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear + { + "units": "degrees_north", + "valid_min": -77.0104751586914, + "valid_max": 89.9591064453125, + "long_name": "Latitude", + "nav_model": "Default grid", + "standard_name": "latitude", + }, + ), + "x": ( + ["x"], + np.arange(X, dtype="int32"), + { + "standard_name": "projection_x_coordinate", + "axis": "X", + "units": "1", + }, + ), + "y": ( + ["y"], + np.arange(Y, dtype="int32"), + { + "standard_name": "projection_y_coordinate", + "axis": "Y", + "units": "1", + }, + ), + "time_counter": ( + ["time_counter"], + np.empty(0, dtype="datetime64[ns]"), + { + "standard_name": "time", + "long_name": "Time axis", + "axis": "T", + "time_origin": "1950-JAN-01 00:00:00", + }, + ), + "deptht": ( + ["deptht"], + np.linspace(1, 5500, Z, dtype="float64"), + { + "units": "m", + "positive": "down", + "valid_min": 0.4940253794193268, + "valid_max": 5727.91650390625, + "long_name": "Vertical T levels", + "standard_name": "depth", + "axis": "Z", + }, + ), + }, + ) + + +def _NEMO_MOI_V(): + """NEMO model dataset (V component) as serviced by Mercator Ocean International""" + return xr.Dataset( + { + "vomecrty": ( + ["deptht", "y", "x"], + np.random.rand(Z, Y, X), + { + "units": "m s-1", + "valid_min": -10.0, + "valid_max": 10.0, + "long_name": "Meridional velocity", + "unit_long": "Meters per second", + "standard_name": "sea_water_y_velocity", + "short_name": "vomecrty", + "online_operation": "N/A", + "interval_operation": 86400, + "interval_write": 86400, + "associate": "time_counter deptht nav_lat nav_lon", + }, + ), + }, + coords={ + "nav_lon": ( + ["y", "x"], + np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear + { + "units": "degrees_east", + "valid_min": -179.99984754002182, + "valid_max": 179.999842386314, + "long_name": "Longitude", + "nav_model": "Default grid", + "standard_name": "longitude", + }, + ), + "nav_lat": ( + ["y", "x"], + np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear + { + "units": "degrees_north", + "valid_min": -77.0104751586914, + "valid_max": 89.9591064453125, + "long_name": "Latitude", + "nav_model": "Default grid", + "standard_name": "latitude", + }, + ), + "x": ( + ["x"], + np.arange(X, dtype="int32"), + { + "standard_name": "projection_x_coordinate", + "axis": "X", + "units": "1", + }, + ), + "y": ( + ["y"], + np.arange(Y, dtype="int32"), + { + "standard_name": "projection_y_coordinate", + "axis": "Y", + "units": "1", + }, + ), + "time_counter": ( + ["time_counter"], + np.empty(0, dtype="datetime64[ns]"), + { + "standard_name": "time", + "long_name": "Time axis", + "axis": "T", + "time_origin": "1950-JAN-01 00:00:00", + }, + ), + "deptht": ( + ["deptht"], + np.linspace(1, 5500, Z, dtype="float64"), + { + "units": "m", + "positive": "down", + "valid_min": 0.4940253794193268, + "valid_max": 5727.91650390625, + "long_name": "Vertical T levels", + "standard_name": "depth", + "axis": "Z", + }, + ), + }, + ) + + +def _CESM(): + """CESM model dataset""" + return ( + xr.Dataset( + { + "UVEL": ( + ["time", "z_t", "nlat", "nlon"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "long_name": "Velocity in grid-x direction", + "units": "centimeter/s", + "grid_loc": 3221, + "cell_methods": "time:mean", + }, + ), + "VVEL": ( + ["time", "z_t", "nlat", "nlon"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "long_name": "Velocity in grid-y direction", + "units": "centimeter/s", + "grid_loc": 3221, + "cell_methods": "time:mean", + }, + ), + "WVEL": ( + ["time", "z_w_top", "nlat", "nlon"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "long_name": "Vertical Velocity", + "units": "centimeter/s", + "grid_loc": 3112, + "cell_methods": "time:mean", + }, + ), + }, + coords={ + "time": ( + ["time"], + TIME, + { + "long_name": "time", + "bounds": "time_bounds", + }, + ), + "z_t": ( + ["z_t"], + np.linspace(0, 5000, Z, dtype="float32"), + { + "long_name": "depth from surface to midpoint of layer", + "units": "centimeters", + "positive": "down", + "valid_min": 500.0, + "valid_max": 537500.0, + }, + ), + "z_w_top": ( + ["z_w_top"], + np.linspace(0, 5000, Z, dtype="float32"), + { + "long_name": "depth from surface to top of layer", + "units": "centimeters", + "positive": "down", + "valid_min": 0.0, + "valid_max": 525000.94, + }, + ), + "ULONG": ( + ["nlat", "nlon"], + np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear + { + "long_name": "array of u-grid longitudes", + "units": "degrees_east", + }, + ), + "ULAT": ( + ["nlat", "nlon"], + np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear + { + "long_name": "array of u-grid latitudes", + "units": "degrees_north", + }, + ), + }, + ), + ) + + +def _MITgcm_netcdf(): + """MITgcm model dataset in netCDF format""" + return xr.Dataset( + # + { + "U": ( + ["T", "Z", "Y", "Xp1"], + np.random.rand(T, Z, Y, X + 1).astype("float32"), + { + "units": "m/s", + "coordinates": "XU YU RC iter", + }, + ), + "V": ( + ["T", "Z", "Yp1", "X"], + np.random.rand(T, Z, Y + 1, X).astype("float32"), + { + "units": "m/s", + "coordinates": "XV YV RC iter", + }, + ), + "W": ( + ["T", "Zl", "Y", "X"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "units": "m/s", + "coordinates": "XC YC RC iter", + }, + ), + "Temp": ( + ["T", "Z", "Y", "X"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "units": "degC", + "coordinates": "XC YC RC iter", + "long_name": "potential_temperature", + }, + ), + }, + coords={ + "T": ( + ["T"], + np.arange(0, T, dtype="float64"), + { + "long_name": "model_time", + "units": "s", + }, + ), + "Z": ( + ["Z"], + np.linspace(-25, -5000, Z, dtype="float64"), + { + "long_name": "vertical coordinate of cell center", + "units": "meters", + "positive": "up", + }, + ), + "Zl": ( + ["Zl"], + np.linspace(0, -4500, Z, dtype="float64"), + { + "long_name": "vertical coordinate of upper cell interface", + "units": "meters", + "positive": "up", + }, + ), + "Y": ( + ["Y"], + np.linspace(500, 5000, Y, dtype="float64"), + { + "long_name": "Y-Coordinate of cell center", + "units": "meters", + }, + ), + "Yp1": ( + ["Yp1"], + np.linspace(0, 4500, Y + 1, dtype="float64"), + { + "long_name": "Y-Coordinate of cell corner", + "units": "meters", + }, + ), + "X": ( + ["X"], + np.linspace(500, 5000, X, dtype="float64"), + { + "long_name": "X-coordinate of cell center", + "units": "meters", + }, + ), + "Xp1": ( + ["Xp1"], + np.linspace(0, 4100, X + 1, dtype="float64"), + { + "long_name": "X-Coordinate of cell corner", + "units": "meters", + }, + ), + }, + ) + + +def _ERA5_wind(): + """ERA5 10m wind model dataset""" + return xr.Dataset( + { + "u10": ( + ["time", "latitude", "longitude"], + np.random.rand(T, Y, X).astype("float32"), + { + "long_name": "10 metre U wind component", + "units": "m s**-1", + }, + ), + "v10": ( + ["time", "latitude", "longitude"], + np.random.rand(T, Y, X).astype("float32"), + { + "long_name": "10 metre V wind component", + "units": "m s**-1", + }, + ), + }, + coords={ + "time": ( + ["time"], + TIME, + { + "long_name": "time", + }, + ), + "latitude": ( + ["latitude"], + np.linspace(90, -90, Y), # Note: ERA5 uses latitudes from 90 to -90 + { + "long_name": "latitude", + "units": "degrees_north", + }, + ), + "longitude": ( + ["longitude"], + np.linspace(0, 360, X, endpoint=False), + { + "long_name": "longitude", + "units": "degrees_east", + }, + ), + }, + ) + + +def _FES_tides(): + """FES tidal model dataset""" + return xr.Dataset( + { + "Ug": ( + ["lat", "lon"], + np.random.rand(Y, X).astype("float32"), + { + "long_name": "Eastward sea water velocity phaselag due to non equilibrium ocean tide at m2 frequency", + "units": "degrees", + "grid_mapping": "crs", + }, + ), + "Ua": ( + ["lat", "lon"], + np.random.rand(Y, X).astype("float32"), + { + "long_name": "Eastward sea water velocity amplitude due to non equilibrium ocean tide at m2 frequency", + "units": "cm/s", + "grid_mapping": "crs", + }, + ), + }, + coords={ + "lat": ( + ["lat"], + np.linspace(-90, 90, Y), + { + "long_name": "latitude", + "units": "degrees_north", + "bounds": "lat_bnds", + "axis": "Y", + "valid_min": -90.0, + "valid_max": 90.0, + }, + ), + "lon": ( + ["lon"], + np.linspace(0, 360, X, endpoint=False), + { + "long_name": "longitude", + "units": "degrees_east", + "bounds": "lon_bnds", + "axis": "X", + "valid_min": 0.0, + "valid_max": 360.0, + }, + ), + }, + ) + + +def _hycom_espc(): + """HYCOM ESPC model dataset from https://data.hycom.org/datasets/ESPC-D-V02/data/daily_netcdf/2025/""" + return xr.Dataset( + { + "water_u": ( + ["time", "depth", "lat", "lon"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "long_name": "Eastward Water Velocity", + "standard_name": "eastward_sea_water_velocity", + "units": "m/s", + "NAVO_code": 17, + "actual_range": [-3.3700001, 3.6840003], + "cell_methods": "time: mean", + }, + ), + "tau": ( + ["time"], + np.linspace(0, 24, T, dtype="float64"), + { + "long_name": "Tau", + "units": "hours since analysis", + "time_origin": "2024-12-31 12:00:00", + "NAVO_code": 56, + "cell_methods": "time: mean", + }, + ), + }, + coords={ + "time": ( + ["time"], + np.arange(0, T, dtype="float64"), + { + "long_name": "Valid Time", + "units": "hours since 2000-01-01 00:00:00", + "time_origin": "2000-01-01 00:00:00", + "calendar": "standard", + "axis": "T", + "NAVO_code": 13, + "cell_methods": "time: mean", + }, + ), + "depth": ( + ["depth"], + np.linspace(0, 5000, Z, dtype="float32"), + { + "long_name": "Depth", + "standard_name": "depth", + "units": "m", + "positive": "down", + "axis": "Z", + "NAVO_code": 5, + }, + ), + "lat": ( + ["lat"], + np.linspace(-80, 90, Y), + { + "long_name": "Latitude", + "standard_name": "latitude", + "units": "degrees_north", + "point_spacing": "even", + "axis": "Y", + "NAVO_code": 1, + }, + ), + "lon": ( + ["lon"], + np.linspace(0, 360, X, endpoint=False), + { + "long_name": "Longitude", + "standard_name": "longitude", + "units": "degrees_east", + "modulo": "360 degrees", + "axis": "X", + "NAVO_code": 2, + }, + ), + }, + ) + + +def _ecco4(): + """ECCO V4r4 model dataset (from https://podaac.jpl.nasa.gov/dataset/ECCO_L4_OCEAN_VEL_LLC0090GRID_DAILY_V4R4#capability-modal-download)""" + tiles = 13 + lon_grid = np.tile( + np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), (tiles, 1, 1) + ) # NOTE this grid is not correct, as duplicates for each tile + lat_grid = np.tile( + np.tile(np.linspace(-89, 89, Y), (X, 1)).T, (tiles, 1, 1) + ) # NOTE this grid is not correct, as duplicates for each tile + return xr.Dataset( + { + "UVEL": ( + ["time", "k", "tile", "j", "i_g"], + np.random.rand(T, Z, tiles, Y, X).astype("float32"), + { + "long_name": "Horizontal velocity in the model +x direction", + "units": "m s-1", + "mate": "VVEL", + "coverage_content_type": "modelResult", + "direction": ">0 increases volume", + "standard_name": "sea_water_x_velocity", + "comment": "Horizontal velocity in the +x direction at the 'u' face of the tracer cell on the native model grid. Note: in the Arakawa-C grid, horizontal velocities are staggered relative to the tracer cells with indexing such that +UVEL(i_g,j,k) corresponds to +x fluxes through the 'u' face of the tracer cell at (i,j,k). Do NOT use UVEL for volume flux calculations because the model's grid cell thicknesses vary with time (z* coordinates); use UVELMASS instead. Also, the model +x direction does not necessarily correspond to the geographical east-west direction because the x and y axes of the model's curvilinear lat-lon-cap (llc) grid have arbitrary orientations which vary within and across tiles. See EVEL and NVEL for zonal and meridional velocity.", + "valid_min": -2.139253616333008, + "valid_max": 2.038635015487671, + }, + ), + "VVEL": ( + ["time", "k", "tile", "j_g", "i"], + np.random.rand(T, Z, tiles, Y, X).astype("float32"), + { + "long_name": "Horizontal velocity in the model +y direction", + "units": "m s-1", + "mate": "UVEL", + "coverage_content_type": "modelResult", + "direction": ">0 increases volume", + "standard_name": "sea_water_y_velocity", + "comment": "Horizontal velocity in the +y direction at the 'v' face of the tracer cell on the native model grid. Note: in the Arakawa-C grid, horizontal velocities are staggered relative to the tracer cells with indexing such that +VVEL(i,j_g,k) corresponds to +y fluxes through the 'v' face of the tracer cell at (i,j,k). Do NOT use VVEL for volume flux calculations because the model's grid cell thicknesses vary with time (z* coordinates); use VVELMASS instead. Also, the model +y direction does not necessarily correspond to the geographical north-south direction because the x and y axes of the model's curvilinear lat-lon-cap (llc) grid have arbitrary orientations which vary within and across tiles. See EVEL and NVEL for zonal and meridional velocity.", + "valid_min": -1.7877743244171143, + "valid_max": 1.9089667797088623, + }, + ), + "WVEL": ( + ["time", "k_l", "tile", "j", "i"], + np.random.rand(T, Z, tiles, Y, X).astype("float32"), + { + "long_name": "Vertical velocity", + "units": "m s-1", + "coverage_content_type": "modelResult", + "direction": ">0 decreases volume", + "standard_name": "upward_sea_water_velocity", + "comment": "Vertical velocity in the +z direction at the top 'w' face of the tracer cell on the native model grid. Note: in the Arakawa-C grid, vertical velocities are staggered relative to the tracer cells with indexing such that +WVEL(i,j,k_l) corresponds to upward +z motion through the top 'w' face of the tracer cell at (i,j,k). WVEL is identical to WVELMASS.", + "valid_min": -0.0023150660563260317, + "valid_max": 0.0016380994347855449, + }, + ), + }, + coords={ + "time": ( + ["time"], + TIME, + { + "long_name": "center time of averaging period", + "standard_name": "time", + "axis": "T", + "bounds": "time_bnds", + "coverage_content_type": "coordinate", + }, + ), + "tile": ( + ["tile"], + np.arange(tiles, dtype="int32"), + { + "long_name": "lat-lon-cap tile index", + "coverage_content_type": "coordinate", + "comment": "The ECCO V4 horizontal model grid is divided into 13 tiles of 90x90 cells for convenience.", + }, + ), + "k": ( + ["k"], + np.arange(Z, dtype="int32"), + { + "long_name": "grid index in z for tracer variables", + "axis": "Z", + "swap_dim": "Z", + "coverage_content_type": "coordinate", + }, + ), + "k_l": ( + ["k_l"], + np.arange(Z, dtype="int32"), + { + "long_name": "grid index in z corresponding to the top face of tracer grid cells ('w' locations)", + "axis": "Z", + "swap_dim": "Zl", + "coverage_content_type": "coordinate", + "c_grid_axis_shift": -0.5, + "comment": "First index corresponds to the top surface of the uppermost tracer grid cell. The use of 'l' in the variable name follows the MITgcm convention for ocean variables in which the lower (l) face of a tracer grid cell on the logical grid corresponds to the top face of the grid cell on the physical grid.", + }, + ), + "j": ( + ["j"], + np.arange(Y, dtype="int32"), + { + "long_name": "grid index in y for variables at tracer and 'u' locations", + "axis": "Y", + "swap_dim": "YC", + "coverage_content_type": "coordinate", + "comment": "In the Arakawa C-grid system, tracer (e.g., THETA) and 'u' variables (e.g., UVEL) have the same y coordinate on the model grid.", + }, + ), + "j_g": ( + ["j_g"], + np.arange(Y, dtype="int32"), + { + "long_name": "grid index in y for variables at 'v' and 'g' locations", + "axis": "Y", + "swap_dim": "YG", + "c_grid_axis_shift": -0.5, + "coverage_content_type": "coordinate", + "comment": "In the Arakawa C-grid system, 'v' (e.g., VVEL) and 'g' variables (e.g., XG) have the same y coordinate.", + }, + ), + "i": ( + ["i"], + np.arange(X, dtype="int32"), + { + "long_name": "grid index in x for variables at tracer and 'v' locations", + "axis": "X", + "swap_dim": "XC", + "coverage_content_type": "coordinate", + "comment": "In the Arakawa C-grid system, tracer (e.g., THETA) and 'v' variables (e.g., VVEL) have the same x coordinate on the model grid.", + }, + ), + "i_g": ( + ["i_g"], + np.arange(X, dtype="int32"), + { + "long_name": "grid index in x for variables at 'u' and 'g' locations", + "axis": "X", + "swap_dim": "XG", + "c_grid_axis_shift": -0.5, + "coverage_content_type": "coordinate", + "comment": "In the Arakawa C-grid system, 'u' (e.g., UVEL) and 'g' variables (e.g., XG) have the same x coordinate on the model grid.", + }, + ), + "Z": ( + ["k"], + np.linspace(-5, -5900, Z, dtype="float32"), + { + "long_name": "depth of tracer grid cell center", + "standard_name": "depth", + "units": "m", + "positive": "up", + "bounds": "Z_bnds", + "coverage_content_type": "coordinate", + "comment": "Non-uniform vertical spacing.", + }, + ), + "Zl": ( + ["k_l"], + np.linspace(0, -5678, Z, dtype="float32"), + { + "long_name": "depth of the top face of tracer grid cells", + "standard_name": "depth", + "units": "m", + "positive": "up", + "coverage_content_type": "coordinate", + "comment": "First element is 0m, the depth of the top face of the first tracer grid cell (ocean surface). Last element is the depth of the top face of the deepest grid cell. The use of 'l' in the variable name follows the MITgcm convention for ocean variables in which the lower (l) face of a tracer grid cell on the logical grid corresponds to the top face of the grid cell on the physical grid. In other words, the logical vertical grid of MITgcm ocean variables is inverted relative to the physical vertical grid.", + }, + ), + "YC": ( + ["tile", "j", "i"], + lat_grid, + { + "long_name": "latitude of tracer grid cell center", + "standard_name": "latitude", + "units": "degrees_north", + "coordinate": "YC XC", + "bounds": "YC_bnds", + "coverage_content_type": "coordinate", + "comment": "nonuniform grid spacing", + }, + ), + "YG": ( + ["tile", "j_g", "i_g"], + lat_grid, + { + "long_name": "latitude of 'southwest' corner of tracer grid cell", + "standard_name": "latitude", + "units": "degrees_north", + "coordinate": "YG XG", + "coverage_content_type": "coordinate", + "comment": "Nonuniform grid spacing. Note: 'southwest' does not correspond to geographic orientation but is used for convenience to describe the computational grid. See MITgcm documentation for details.", + }, + ), + "XC": ( + ["tile", "j", "i"], + lon_grid, + { + "long_name": "longitude of tracer grid cell center", + "standard_name": "longitude", + "units": "degrees_east", + "coordinate": "YC XC", + "bounds": "XC_bnds", + "coverage_content_type": "coordinate", + "comment": "nonuniform grid spacing", + }, + ), + "XG": ( + ["tile", "j_g", "i_g"], + lon_grid, + { + "long_name": "longitude of 'southwest' corner of tracer grid cell", + "standard_name": "longitude", + "units": "degrees_east", + "coordinate": "YG XG", + "coverage_content_type": "coordinate", + "comment": "Nonuniform grid spacing. Note: 'southwest' does not correspond to geographic orientation but is used for convenience to describe the computational grid. See MITgcm documentation for details.", + }, + ), + }, + ) + + +def _CROCO_idealized(): + """CROCO idealized model dataset""" + return xr.Dataset( + { + "u": ( + ["time", "s_rho", "eta_rho", "xi_u"], + np.random.rand(T, Z, Y, X - 1).astype("float32"), + { + "long_name": "u-momentum component", + "units": "meter second-1", + "field": "u-velocity, scalar, series", + "standard_name": "sea_water_x_velocity_at_u_location", + }, + ), + "v": ( + ["time", "s_rho", "eta_v", "xi_rho"], + np.random.rand(T, Z, Y - 1, X).astype("float32"), + { + "long_name": "v-momentum component", + "units": "meter second-1", + "field": "v-velocity, scalar, series", + "standard_name": "sea_water_y_velocity_at_v_location", + }, + ), + "w": ( + ["time", "s_rho", "eta_rho", "xi_rho"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "long_name": "vertical momentum component", + "units": "meter second-1", + "field": "w-velocity, scalar, series", + "standard_name": "upward_sea_water_velocity", + "coordinates": "lat_rho lon_rho", + }, + ), + "h": ( + ["eta_rho", "xi_rho"], + np.random.rand(Y, X).astype("float32"), + { + "long_name": "bathymetry at RHO-points", + "units": "meter", + "field": "bath, scalar", + "standard_name": "model_sea_floor_depth_below_geoid", + }, + ), + "zeta": ( + ["time", "eta_rho", "xi_rho"], + np.random.rand(T, Y, X).astype("float32"), + { + "long_name": "free-surface", + "units": "meter", + "field": "free_surface, scalar, series", + "standard_name": "sea_surface_height", + }, + ), + "Cs_w": ( + ["s_w"], + np.random.rand(Z + 1).astype("float32"), + { + "long_name": "S-coordinate stretching curves at W-points", + }, + ), + "hc": ( + [], + np.array(0.0, dtype="float32"), + { + "long_name": "S-coordinate parameter, critical depth", + "units": "meter", + }, + ), + }, + coords={ + "time": ( + ["time"], + np.arange(0, T, dtype="float64"), + { + "long_name": "time since initialization", + "units": "second", + "field": "time, scalar, series", + "standard_name": "time", + "axis": "T", + }, + ), + "s_rho": ( + ["s_rho"], + np.linspace(-0.95, 0.05, Z, dtype="float32"), + { + "long_name": "S-coordinate at RHO-points", + "standard_name": "ocean_s_coordinate_g1", + "positive": "up", + "axis": "Z", + "formula_terms": "s: sc_r C: Cs_r eta: zeta depth: h depth_c: hc", + }, + ), + "s_w": ( + ["s_w"], + np.linspace(-1, 0, Z + 1, dtype="float32"), + { + "long_name": "S-coordinate at W-points", + "standard_name": "ocean_s_coordinate_g1_at_w_location", + "positive": "up", + "axis": "Z", + "c_grid_axis_shift": -0.5, + "formula_terms": "s: sc_w C: Cs_w eta: zeta depth: h depth_c: hc", + }, + ), + "eta_rho": ( + ["eta_rho"], + np.arange(Y, dtype="float32"), + { + "long name": "y-dimension of the grid", + "standard_name": "y_grid_index", + "axis": "Y", + "c_grid_dynamic_range": f"2:{Y}", + }, + ), + "eta_v": ( + ["eta_v"], + np.arange(Y - 1, dtype="float32"), + { + "long name": "y-dimension of the grid at v location", + "standard_name": "y_grid_index_at_v_location", + "axis": "Y", + "c_grid_axis_shift": 0.5, + "c_grid_dynamic_range": f"2:{Y - 1}", + }, + ), + "xi_rho": ( + ["xi_rho"], + np.arange(X, dtype="float32"), + { + "long name": "x-dimension of the grid", + "standard_name": "x_grid_index", + "axis": "X", + "c_grid_dynamic_range": f"2:{X}", + }, + ), + "xi_u": ( + ["xi_u"], + np.arange(X - 1, dtype="float32"), + { + "long name": "x-dimension of the grid at u location", + "standard_name": "x_grid_index_at_u_location", + "axis": "X", + "c_grid_axis_shift": 0.5, + "c_grid_dynamic_range": f"2:{X - 1}", + }, + ), + "x_rho": ( + ["eta_rho", "xi_rho"], + np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear + { + "long_name": "x-locations of RHO-points", + "units": "meter", + "standard_name": "plane_x_coordinate", + "field": "x_rho, scalar", + }, + ), + "y_rho": ( + ["eta_rho", "xi_rho"], + np.tile(np.linspace(-89, 89, Y), (X, 1)).T, # note that this is not curvilinear + { + "long_name": "y-locations of RHO-points", + "units": "meter", + "standard_name": "plane_y_coordinate", + "field": "y_rho, scalar", + }, + ), + }, + ) + + +datasets = { + "ds_copernicusmarine": _copernicusmarine(), + "ds_copernicusmarine_globcurrents": _copernicusmarine_globcurrents(), + "ds_NEMO_MOI_U": _NEMO_MOI_U(), + "ds_NEMO_MOI_V": _NEMO_MOI_V(), + "ds_CESM": _CESM(), + "ds_MITgcm_netcdf": _MITgcm_netcdf(), + "ds_ERA5_wind": _ERA5_wind(), + "ds_FES_tides": _FES_tides(), + "ds_hycom_espc": _hycom_espc(), + "ds_ecco4": _ecco4(), + "ds_CROCO_idealized": _CROCO_idealized(), +} diff --git a/parcels/_datasets/structured/generic.py b/parcels/_datasets/structured/generic.py index 72f070e384..2432f079c0 100644 --- a/parcels/_datasets/structured/generic.py +++ b/parcels/_datasets/structured/generic.py @@ -1,5 +1,3 @@ -"""Datasets focussing on grid geometry""" - import numpy as np import xarray as xr diff --git a/parcels/_datasets/utils.py b/parcels/_datasets/utils.py index 19c64643d9..0ced45baee 100644 --- a/parcels/_datasets/utils.py +++ b/parcels/_datasets/utils.py @@ -65,3 +65,95 @@ def dataset_repr_diff(ds1: xr.Dataset, ds2: xr.Dataset) -> str: diff = difflib.ndiff(repr1.splitlines(keepends=True), repr2.splitlines(keepends=True)) return "".join(diff) + + +def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): + print(f"Comparing {ds1_name} and {ds2_name}\n") + + # Compare dataset attributes + print("Dataset Attributes Comparison:") + if ds1.attrs == ds2.attrs: + print(" Dataset attributes are identical.") + else: + print(" Dataset attributes differ.") + for attr_name in set(ds1.attrs.keys()) | set(ds2.attrs.keys()): + if attr_name not in ds1.attrs: + print(f" Attribute '{attr_name}' only in {ds2_name}") + elif attr_name not in ds2.attrs: + print(f" Attribute '{attr_name}' only in {ds1_name}") + elif ds1.attrs[attr_name] != ds2.attrs[attr_name]: + print(f" Attribute '{attr_name}' differs:") + print(f" {ds1_name}: {ds1.attrs[attr_name]}") + print(f" {ds2_name}: {ds2.attrs[attr_name]}") + print("-" * 30) + + # Compare dimensions + print("Dimensions Comparison:") + ds1_dims = set(ds1.dims) + ds2_dims = set(ds2.dims) + if ds1_dims == ds2_dims: + print(" Dimension names are identical.") + else: + print(" Dimension names differ:") + print(f" {ds1_name} dims: {sorted(list(ds1_dims))}") + print(f" {ds2_name} dims: {sorted(list(ds2_dims))}") + + # For common dimensions, compare order (implicit by comparing coordinate values for sortedness) + # and size (though size is parameterized and expected to be different) + for dim_name in ds1_dims.intersection(ds2_dims): + print(f" Dimension '{dim_name}':") + # Sizes will differ due to DIM_SIZE, so we don't strictly compare them. + print(f" {ds1_name} size: {ds1.dims[dim_name]}, {ds2_name} size: {ds2.dims[dim_name]}") + # Check if coordinates associated with dimensions are sorted (increasing) + if dim_name in ds1.coords and dim_name in ds2.coords: + is_ds1_sorted = np.all(np.diff(ds1[dim_name].values) >= 0) if len(ds1[dim_name].values) > 1 else True + is_ds2_sorted = np.all(np.diff(ds2[dim_name].values) >= 0) if len(ds2[dim_name].values) > 1 else True + if is_ds1_sorted == is_ds2_sorted: + print(f" Order for '{dim_name}' is consistent (both sorted: {is_ds1_sorted})") + else: + print( + f" Order for '{dim_name}' differs: {ds1_name} sorted: {is_ds1_sorted}, {ds2_name} sorted: {is_ds2_sorted}" + ) + print("-" * 30) + + # Compare variables (name, attributes, dimensions used) + print("Variables Comparison:") + ds1_vars = set(ds1.variables.keys()) + ds2_vars = set(ds2.variables.keys()) + + if ds1_vars == ds2_vars: + print(" Variable names are identical.") + else: + print(" Variable names differ:") + print(f" {ds1_name} vars: {sorted(list(ds1_vars - ds2_vars))}") + print(f" {ds2_name} vars: {sorted(list(ds2_vars - ds1_vars))}") + print(f" Common vars: {sorted(list(ds1_vars.intersection(ds2_vars)))}") + + for var_name in ds1_vars.intersection(ds2_vars): + print(f" Variable '{var_name}':") + var1 = ds1[var_name] + var2 = ds2[var_name] + + # Compare attributes + if var1.attrs == var2.attrs: + print(" Attributes are identical.") + else: + print(" Attributes differ.") + for attr_name in set(var1.attrs.keys()) | set(var2.attrs.keys()): + if attr_name not in var1.attrs: + print(f" Attribute '{attr_name}' only in {ds2_name}'s '{var_name}'") + elif attr_name not in var2.attrs: + print(f" Attribute '{attr_name}' only in {ds1_name}'s '{var_name}'") + elif var1.attrs[attr_name] != var2.attrs[attr_name]: + print(f" Attribute '{attr_name}' differs for '{var_name}':") + print(f" {ds1_name}: {var1.attrs[attr_name]}") + print(f" {ds2_name}: {var2.attrs[attr_name]}") + + # Compare dimensions used by the variable + if var1.dims == var2.dims: + print(f" Dimensions used are identical: {var1.dims}") + else: + print(" Dimensions used differ:") + print(f" {ds1_name}: {var1.dims}") + print(f" {ds2_name}: {var2.dims}") + print("=" * 30 + " End of Comparison " + "=" * 30) diff --git a/tests/v4/test_fieldset.py b/tests/v4/test_fieldset.py index be5fc04a41..f623b7f1c4 100644 --- a/tests/v4/test_fieldset.py +++ b/tests/v4/test_fieldset.py @@ -6,6 +6,9 @@ import xarray as xr from parcels import xgcm +from parcels._datasets.structured.circulation_models import ( + datasets as datasets_circulation_models, # noqa: F401 +) # just making sure the import works. Will eventually be used in tests from parcels._datasets.structured.generic import T as T_structured from parcels._datasets.structured.generic import datasets as datasets_structured from parcels.field import Field, VectorField