From 1dd2eef6c96315314c40a796ca69347d34d0bb37 Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Fri, 27 Jun 2025 16:14:12 +0200 Subject: [PATCH 01/24] Adding ds_copernicusmarine to circulation_models dataset Adding a circulation model dataset mimicking the layout of copernicusmarine --- .../structured/circulation_models.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 parcels/_datasets/structured/circulation_models.py diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py new file mode 100644 index 0000000000..2c97afc5d8 --- /dev/null +++ b/parcels/_datasets/structured/circulation_models.py @@ -0,0 +1,27 @@ +"""Datasets mimicking the layout of real-world hydrodynamic models""" + +import numpy as np +import xarray as xr + +from . import T, X, Y, Z + +__all__ = ["T", "X", "Y", "Z", "datasets"] + +TIME = xr.date_range("2000", "2001", T) + + +datasets = { + "ds_copernicusmarine": xr.Dataset( + # Copernicus Marine Service dataset as retrieved by the `copernicusmarine` toolkit + { + "uo": (["depth", "latitude", "longitude", "time"], np.random.rand(Z, Y, X, T)), + "vo": (["depth", "latitude", "longitude", "time"], np.random.rand(Z, Y, X, T)), + }, + coords={ + "depth": (["depth"], np.linspace(0, 5000, Z), {"axis": "Z"}), + "latitude": (["latitude"], np.linspace(-90, 90, Y), {"axis": "Y"}), + "longitude": (["longitude"], np.linspace(-180, 180, X), {"axis": "X"}), + "time": (["time"], TIME, {"axis": "T"}), + }, + ), +} From 47f5b7817031fb9fc59d3d37525448787f9ad6cd Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Fri, 27 Jun 2025 16:17:22 +0200 Subject: [PATCH 02/24] Fixing dimension order in uo and vo --- parcels/_datasets/structured/circulation_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index 2c97afc5d8..4d6bd0def9 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -14,8 +14,8 @@ "ds_copernicusmarine": xr.Dataset( # Copernicus Marine Service dataset as retrieved by the `copernicusmarine` toolkit { - "uo": (["depth", "latitude", "longitude", "time"], np.random.rand(Z, Y, X, T)), - "vo": (["depth", "latitude", "longitude", "time"], np.random.rand(Z, Y, X, T)), + "uo": (["depth", "latitude", "longitude", "time"], np.random.rand(T, Z, Y, X)), + "vo": (["depth", "latitude", "longitude", "time"], np.random.rand(T, Z, Y, X)), }, coords={ "depth": (["depth"], np.linspace(0, 5000, Z), {"axis": "Z"}), From 0f3782e8a42d5922430fd19e8e3bed7fad059cd1 Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Fri, 27 Jun 2025 16:19:32 +0200 Subject: [PATCH 03/24] Updating depth range of copernicusmarine --- parcels/_datasets/structured/circulation_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index 4d6bd0def9..1ac499d2cd 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -18,7 +18,7 @@ "vo": (["depth", "latitude", "longitude", "time"], np.random.rand(T, Z, Y, X)), }, coords={ - "depth": (["depth"], np.linspace(0, 5000, Z), {"axis": "Z"}), + "depth": (["depth"], np.linspace(0.49, 5727.92, Z), {"axis": "Z"}), "latitude": (["latitude"], np.linspace(-90, 90, Y), {"axis": "Y"}), "longitude": (["longitude"], np.linspace(-180, 180, X), {"axis": "X"}), "time": (["time"], TIME, {"axis": "T"}), From a1c73fd9dc3db81ad24941e30c72173f1d3333cd Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Fri, 27 Jun 2025 16:52:15 +0200 Subject: [PATCH 04/24] adding metadata to copernicusmarine coordinates --- .../structured/circulation_models.py | 48 +++++++++++++++++-- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index 1ac499d2cd..fadfb9ca70 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -18,10 +18,50 @@ "vo": (["depth", "latitude", "longitude", "time"], np.random.rand(T, Z, Y, X)), }, coords={ - "depth": (["depth"], np.linspace(0.49, 5727.92, Z), {"axis": "Z"}), - "latitude": (["latitude"], np.linspace(-90, 90, Y), {"axis": "Y"}), - "longitude": (["longitude"], np.linspace(-180, 180, X), {"axis": "X"}), - "time": (["time"], TIME, {"axis": "T"}), + "depth": ( + ["depth"], + np.linspace(0.49, 5727.92, Z), + { + "unit_long": "Meters", + "units": "m", + "axis": "Z", + "long_name": "depth", + "standard_name": "depth", + "positive": "down", + }, + ), + "latitude": ( + ["latitude"], + np.linspace(-90, 90, Y), + { + "unit_long": "Degrees North", + "units": "degrees_north", + "axis": "Y", + "long_name": "Latitude", + "standard_name": "latitude", + }, + ), + "longitude": ( + ["longitude"], + np.linspace(-180, 180, X), + { + "unit_long": "Degrees East", + "units": "degrees_east", + "axis": "X", + "long_name": "Longitude", + "standard_name": "longitude", + }, + ), + "time": ( + ["time"], + TIME, + { + "unit_long": "Hours Since 1950-01-01", + "axis": "T", + "long_name": "Time", + "standard_name": "time", + }, + ), }, ), } From 87bb34b2020ff5ed519898648a705207655833b7 Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Fri, 27 Jun 2025 17:10:49 +0200 Subject: [PATCH 05/24] Adding metadata for variables in copernicusmarine --- .../structured/circulation_models.py | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index fadfb9ca70..e608e324cc 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -14,8 +14,30 @@ "ds_copernicusmarine": xr.Dataset( # Copernicus Marine Service dataset as retrieved by the `copernicusmarine` toolkit { - "uo": (["depth", "latitude", "longitude", "time"], np.random.rand(T, Z, Y, X)), - "vo": (["depth", "latitude", "longitude", "time"], np.random.rand(T, Z, Y, X)), + "uo": ( + ["depth", "latitude", "longitude", "time"], + np.random.rand(T, Z, Y, X), + { + "valid_max": 5.0, + "unit_long": "Meters per second", + "units": "m s-1", + "long_name": "Eastward velocity", + "standard_name": "eastward_sea_water_velocity", + "valid_min": -5.0, + }, + ), + "vo": ( + ["depth", "latitude", "longitude", "time"], + np.random.rand(T, Z, Y, X), + { + "valid_max": 5.0, + "unit_long": "Meters per second", + "units": "m s-1", + "long_name": "Northward velocity", + "standard_name": "northward_sea_water_velocity", + "valid_min": -5.0, + }, + ), }, coords={ "depth": ( From aa014651b7b1449a288e89b1501f1d7de43daf39 Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Tue, 1 Jul 2025 15:13:56 +0200 Subject: [PATCH 06/24] Fixing dimension order in copernicusmarine dataset --- parcels/_datasets/structured/circulation_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index e608e324cc..dfc60e5cc7 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -15,7 +15,7 @@ # Copernicus Marine Service dataset as retrieved by the `copernicusmarine` toolkit { "uo": ( - ["depth", "latitude", "longitude", "time"], + ["time", "depth", "latitude", "longitude"], np.random.rand(T, Z, Y, X), { "valid_max": 5.0, @@ -27,7 +27,7 @@ }, ), "vo": ( - ["depth", "latitude", "longitude", "time"], + ["time", "depth", "latitude", "longitude"], np.random.rand(T, Z, Y, X), { "valid_max": 5.0, From 1705a3b6d348b26898e472cb16784555f3d6d84a Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Tue, 1 Jul 2025 15:15:08 +0200 Subject: [PATCH 07/24] Adding MOI U dataset --- .../structured/circulation_models.py | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index dfc60e5cc7..75ef5c3948 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -86,4 +86,109 @@ ), }, ), + "ds_NEMO_MOI_U": xr.Dataset( + # NEMO model dataset (U component) as serviced by Mercator Ocean International + { + "vozocrtx": ( + ["deptht", "y", "x"], + np.random.rand(Z, Y, X, dtype="float64"), + { + "units": "m s-1", + "valid_min": -10.0, + "valid_max": 10.0, + "long_name": "Zonal velocity", + "unit_long": "Meters per second", + "standard_name": "sea_water_x_velocity", + "short_name": "vozocrtx", + "online_operation": "N/A", + "interval_operation": 86400, + "interval_write": 86400, + "associate": "time_counter deptht nav_lat nav_lon", + }, + ), + "sotkeavmu1": ( + ["y", "x"], + np.random.rand(Y, X, dtype="float64"), + { + "units": "m2 s-1", + "valid_min": 0.0, + "valid_max": 100.0, + "long_name": "Vertical Eddy Viscosity U 1m", + "standard_name": "ocean_vertical_eddy_viscosity_u_1m", + "short_name": "sotkeavmu1", + "online_operation": "N/A", + "interval_operation": 86400, + "interval_write": 86400, + "associate": "time_counter nav_lat nav_lon", + }, + ), + }, + coords={ + "nav_lon": ( + ["y, x"], + np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear + { + "units": "degrees_east", + "valid_min": -179.99984754002182, + "valid_max": 179.999842386314, + "long_name": "Longitude", + "nav_model": "Default grid", + "standard_name": "longitude", + }, + ), + "nav_lat": ( + ["y, x"], + np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear + { + "units": "degrees_north", + "valid_min": -77.0104751586914, + "valid_max": 89.9591064453125, + "long_name": "Latitude", + "nav_model": "Default grid", + "standard_name": "latitude", + }, + ), + "x": ( + ["x"], + np.arange(X, dtype="int32"), + { + "standard_name": "projection_x_coordinate", + "axis": "X", + "units": "1", + }, + ), + "y": ( + ["y"], + np.arange(Y, dtype="int32"), + { + "standard_name": "projection_y_coordinate", + "axis": "Y", + "units": "1", + }, + ), + "time_counter": ( + [], + np.empty(0, dtype="datetime64[ns]"), + { + "standard_name": "time", + "long_name": "Time axis", + "axis": "T", + "time_origin": "1950-JAN-01 00:00:00", + }, + ), + "deptht": ( + ["deptht"], + np.linspace(1, 5500, Z, dtype="float64"), + { + "units": "m", + "positive": "down", + "valid_min": 0.4940253794193268, + "valid_max": 5727.91650390625, + "long_name": "Vertical T levels", + "standard_name": "depth", + "axis": "Z", + }, + ), + }, + ), } From 92cc9947832b7129a446a1e19f22e176f58d72ff Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Tue, 1 Jul 2025 15:17:43 +0200 Subject: [PATCH 08/24] Adding MOI V dataset --- .../structured/circulation_models.py | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index 75ef5c3948..36671104ec 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -191,4 +191,93 @@ ), }, ), + "ds_NEMO_MOI_V": xr.Dataset( + # NEMO model dataset (V component) as serviced by Mercator Ocean International + { + "vomecrty": ( + ["deptht", "y", "x"], + np.random.rand(Z, Y, X, dtype="float64"), + { + "units": "m s-1", + "valid_min": -10.0, + "valid_max": 10.0, + "long_name": "Meridional velocity", + "unit_long": "Meters per second", + "standard_name": "sea_water_y_velocity", + "short_name": "vomecrty", + "online_operation": "N/A", + "interval_operation": 86400, + "interval_write": 86400, + "associate": "time_counter deptht nav_lat nav_lon", + }, + ), + }, + coords={ + "nav_lon": ( + ["y, x"], + np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear + { + "units": "degrees_east", + "valid_min": -179.99984754002182, + "valid_max": 179.999842386314, + "long_name": "Longitude", + "nav_model": "Default grid", + "standard_name": "longitude", + }, + ), + "nav_lat": ( + ["y, x"], + np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear + { + "units": "degrees_north", + "valid_min": -77.0104751586914, + "valid_max": 89.9591064453125, + "long_name": "Latitude", + "nav_model": "Default grid", + "standard_name": "latitude", + }, + ), + "x": ( + ["x"], + np.arange(X, dtype="int32"), + { + "standard_name": "projection_x_coordinate", + "axis": "X", + "units": "1", + }, + ), + "y": ( + ["y"], + np.arange(Y, dtype="int32"), + { + "standard_name": "projection_y_coordinate", + "axis": "Y", + "units": "1", + }, + ), + "time_counter": ( + [], + np.empty(0, dtype="datetime64[ns]"), + { + "standard_name": "time", + "long_name": "Time axis", + "axis": "T", + "time_origin": "1950-JAN-01 00:00:00", + }, + ), + "deptht": ( + ["deptht"], + np.linspace(1, 5500, Z, dtype="float64"), + { + "units": "m", + "positive": "down", + "valid_min": 0.4940253794193268, + "valid_max": 5727.91650390625, + "long_name": "Vertical T levels", + "standard_name": "depth", + "axis": "Z", + }, + ), + }, + ), } From 0475d6744e4234eda76f6b0918b378497c7cdb8f Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Tue, 1 Jul 2025 15:52:23 +0200 Subject: [PATCH 09/24] Adding CESM dataset --- .../structured/circulation_models.py | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index 36671104ec..24bc9c9dde 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -280,4 +280,87 @@ ), }, ), + "ds_CESM": xr.Dataset( + # CESM model dataset + { + "UVEL": ( + ["time", "z_t", "nlat", "nlon"], + np.random.rand(T, Z, Y, X, dtype="float32"), + { + "long_name": "Velocity in grid-x direction", + "units": "centimeter/s", + "grid_loc": 3221, + "cell_methods": "time:mean", + }, + ), + "VVEL": ( + ["time", "z_t", "nlat", "nlon"], + np.random.rand(T, Z, Y, X, dtype="float32"), + { + "long_name": "Velocity in grid-y direction", + "units": "centimeter/s", + "grid_loc": 3221, + "cell_methods": "time:mean", + }, + ), + "WVEL": ( + ["time", "z_w_top", "nlat", "nlon"], + np.random.rand(T, Z, Y, X, dtype="float32"), + { + "long_name": "Vertical Velocity", + "units": "centimeter/s", + "grid_loc": 3112, + "cell_methods": "time:mean", + }, + ), + }, + coords={ + "time": ( + ["time"], + TIME, + { + "long_name": "time", + "bounds": "time_bounds", + }, + ), + "z_t": ( + ["z_t"], + np.linspace(0, 5000, Z, dtype="float32"), + { + "long_name": "depth from surface to midpoint of layer", + "units": "centimeters", + "positive": "down", + "valid_min": 500.0, + "valid_max": 537500.0, + }, + ), + "z_w_top": ( + ["z_w_top"], + np.linspace(0, 5000, Z, dtype="float32"), + { + "long_name": "depth from surface to top of layer", + "units": "centimeters", + "positive": "down", + "valid_min": 0.0, + "valid_max": 525000.94, + }, + ), + "ULONG": ( + ["nlat", "nlon"], + np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear + { + "long_name": "array of u-grid longitudes", + "units": "degrees_east", + }, + ), + "ULAT": ( + ["nlat", "nlon"], + np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear + { + "long_name": "array of u-grid latitudes", + "units": "degrees_north", + }, + ), + }, + ), } From da27a1417bfcc5481b03b618f21643eba42a6761 Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Wed, 2 Jul 2025 08:28:27 +0200 Subject: [PATCH 10/24] Adding ERA5 wind --- .../structured/circulation_models.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index 24bc9c9dde..c350faa406 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -363,4 +363,50 @@ ), }, ), + "ds_ERA5_wind": xr.Dataset( + # ERA5 10m wind model dataset + { + "u10": ( + ["time", "latitude", "longitude"], + np.random.rand(T, Y, X, dtype="float32"), + { + "long_name": "10 metre U wind component", + "units": "m s**-1", + }, + ), + "v10": ( + ["time", "latitude", "longitude"], + np.random.rand(T, Y, X, dtype="float32"), + { + "long_name": "10 metre V wind component", + "units": "m s**-1", + }, + ), + }, + coords={ + "time": ( + ["time"], + TIME, + { + "long_name": "time", + }, + ), + "latitude": ( + ["latitude"], + np.linspace(90, -90, Y), # Note: ERA5 uses latitudes from 90 to -90 + { + "long_name": "latitude", + "units": "degrees_north", + }, + ), + "longitude": ( + ["longitude"], + np.linspace(0, 360, X, endpoint=False), + { + "long_name": "longitude", + "units": "degrees_east", + }, + ), + }, + ), } From 1bb57944580e2f94ebed8c31e5895c417b93ed9f Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Wed, 2 Jul 2025 08:47:13 +0200 Subject: [PATCH 11/24] Adding FES tidal dataset --- .../structured/circulation_models.py | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index c350faa406..948b73e3e3 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -409,4 +409,53 @@ ), }, ), + "ds_FES_tides": xr.Dataset( + # FES tidal model dataset + { + "Ug": ( + ["lat", "lon"], + np.random.rand(Y, X, dtype="float32"), + { + "long_name": "Eastward sea water velocity phaselag due to non equilibrium ocean tide at m2 frequency", + "units": "degrees", + "grid_mapping": "crs", + }, + ), + "Ua": ( + ["lat", "lon"], + np.random.rand(Y, X, dtype="float32"), + { + "long_name": "Eastward sea water velocity amplitude due to non equilibrium ocean tide at m2 frequency", + "units": "cm/s", + "grid_mapping": "crs", + }, + ), + }, + coords={ + "lat": ( + ["lat"], + np.linspace(-90, 90, Y), + { + "long_name": "latitude", + "units": "degrees_north", + "bounds": "lat_bnds", + "axis": "Y", + "valid_min": -90.0, + "valid_max": 90.0, + }, + ), + "lon": ( + ["lon"], + np.linspace(0, 360, X, endpoint=False), + { + "long_name": "longitude", + "units": "degrees_east", + "bounds": "lon_bnds", + "axis": "X", + "valid_min": 0.0, + "valid_max": 360.0, + }, + ), + }, + ), } From 8ae28d77c94711dc68d75d711b75409b9a82238f Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Wed, 2 Jul 2025 09:00:16 +0200 Subject: [PATCH 12/24] Adding Hycom dataset --- .../structured/circulation_models.py | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index 948b73e3e3..8dbb01d340 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -458,4 +458,83 @@ ), }, ), + "ds_hycom_espc": xr.Dataset( + # HYCOM ESPC model dataset from https://data.hycom.org/datasets/ESPC-D-V02/data/daily_netcdf/2025/ + { + "water_u": ( + ["time", "depth", "lat", "lon"], + np.random.rand(T, Z, Y, X, dtype="float32"), + { + "long_name": "Eastward Water Velocity", + "standard_name": "eastward_sea_water_velocity", + "units": "m/s", + "NAVO_code": 17, + "actual_range": [-3.3700001, 3.6840003], + "cell_methods": "time: mean", + }, + ), + "tau": ( + ["time"], + np.arange(0, 24, T, dtype="float64"), + { + "long_name": "Tau", + "units": "hours since analysis", + "time_origin": "2024-12-31 12:00:00", + "NAVO_code": 56, + "cell_methods": "time: mean", + }, + ), + }, + coords={ + "time": ( + ["time"], + np.arange(0, T, dtype="float64"), + { + "long_name": "Valid Time", + "units": "hours since 2000-01-01 00:00:00", + "time_origin": "2000-01-01 00:00:00", + "calendar": "standard", + "axis": "T", + "NAVO_code": 13, + "cell_methods": "time: mean", + }, + ), + "depth": ( + ["depth"], + np.linspace(0, 5000, Z, dtype="float32"), + { + "long_name": "Depth", + "standard_name": "depth", + "units": "m", + "positive": "down", + "axis": "Z", + "NAVO_code": 5, + }, + ), + "lat": ( + ["lat"], + np.linspace(-80, 90, Y), + { + "long_name": "Latitude", + "standard_name": "latitude", + "units": "degrees_north", + "point_spacing": "even", + "axis": "Y", + "NAVO_code": 1, + }, + ), + "lon": ( + ["lon"], + np.linspace(0, 360, X, endpoint=False), + { + "long_name": "Longitude", + "standard_name": "longitude", + "units": "degrees_east", + "modulo": "360 degrees", + "axis": "X", + "NAVO_code": 2, + }, + ), + }, + ), } From 4b9aea7b9d8630400eaa82dd2bbcb8910195f0d4 Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Wed, 2 Jul 2025 11:43:01 +0200 Subject: [PATCH 13/24] Adding CROCO idealised simulations setup --- .../structured/circulation_models.py | 169 ++++++++++++++++++ 1 file changed, 169 insertions(+) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index 8dbb01d340..c43a79b443 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -537,4 +537,173 @@ ), }, ), + "ds_CROCO_idealized": xr.Dataset( + # CROCO idealized model dataset + { + "u": ( + ["time", "s_rho", "eta_rho", "xi_u"], + np.random.rand(T, Z, Y, X - 1, dtype="float32"), + { + "long_name": "u-momentum component", + "units": "meter second-1", + "field": "u-velocity, scalar, series", + "standard_name": "sea_water_x_velocity_at_u_location", + }, + ), + "v": ( + ["time", "s_rho", "eta_v", "xi_rho"], + np.random.rand(T, Z, Y - 1, X, dtype="float32"), + { + "long_name": "v-momentum component", + "units": "meter second-1", + "field": "v-velocity, scalar, series", + "standard_name": "sea_water_y_velocity_at_v_location", + }, + ), + "w": ( + ["time", "s_rho", "eta_rho", "xi_rho"], + np.random.rand(T, Z, Y, X, dtype="float32"), + { + "long_name": "vertical momentum component", + "units": "meter second-1", + "field": "w-velocity, scalar, series", + "standard_name": "upward_sea_water_velocity", + "coordinates": "lat_rho lon_rho", + }, + ), + "h": ( + ["eta_rho", "xi_rho"], + np.random.rand(Y, X, dtype="float32"), + { + "long_name": "bathymetry at RHO-points", + "units": "meter", + "field": "bath, scalar", + "standard_name": "model_sea_floor_depth_below_geoid", + }, + ), + "zeta": ( + ["time", "eta_rho", "xi_rho"], + np.random.rand(T, Y, X, dtype="float32"), + { + "long_name": "free-surface", + "units": "meter", + "field": "free_surface, scalar, series", + "standard_name": "sea_surface_height", + }, + ), + "Cs_w": ( + ["s_w"], + np.random.rand(Z + 1, dtype="float32"), + { + "long_name": "S-coordinate stretching curves at W-points", + }, + ), + "hc": ( + [], + np.array(0.0, dtype="float32"), + { + "long_name": "S-coordinate parameter, critical depth", + "units": "meter", + }, + ), + }, + coords={ + "time": ( + ["time"], + np.arange(0, T, dtype="float64"), + { + "long_name": "time since initialization", + "units": "second", + "field": "time, scalar, series", + "standard_name": "time", + "axis": "T", + }, + ), + "s_rho": ( + ["s_rho"], + np.linspace(-0.95, 0.05, Z, dtype="float32"), + { + "long_name": "S-coordinate at RHO-points", + "standard_name": "ocean_s_coordinate_g1", + "positive": "up", + "axis": "Z", + "formula_terms": "s: sc_r C: Cs_r eta: zeta depth: h depth_c: hc", + }, + ), + "s_w": ( + ["s_w"], + np.linspace(-1, 0, Z + 1, dtype="float32"), + { + "long_name": "S-coordinate at W-points", + "standard_name": "ocean_s_coordinate_g1_at_w_location", + "positive": "up", + "axis": "Z", + "c_grid_axis_shift": -0.5, + "formula_terms": "s: sc_w C: Cs_w eta: zeta depth: h depth_c: hc", + }, + ), + "eta_rho": ( + ["eta_rho"], + np.arange(Y, dtype="float32"), + { + "long name": "y-dimension of the grid", + "standard_name": "y_grid_index", + "axis": "Y", + "c_grid_dynamic_range": f"2:{Y}", + }, + ), + "eta_v": ( + ["eta_v"], + np.arange(Y - 1, dtype="float32"), + { + "long name": "y-dimension of the grid at v location", + "standard_name": "y_grid_index_at_v_location", + "axis": "Y", + "c_grid_axis_shift": 0.5, + "c_grid_dynamic_range": f"2:{Y-1}", + }, + ), + "xi_rho": ( + ["xi_rho"], + np.arange(X, dtype="float32"), + { + "long name": "x-dimension of the grid", + "standard_name": "x_grid_index", + "axis": "X", + "c_grid_dynamic_range": f"2:{X}", + }, + ), + "xi_u": ( + ["xi_u"], + np.arange(X - 1, dtype="float32"), + { + "long name": "x-dimension of the grid at u location", + "standard_name": "x_grid_index_at_u_location", + "axis": "X", + "c_grid_axis_shift": 0.5, + "c_grid_dynamic_range": f"2:{X-1}", + }, + ), + "x_rho": ( + ["eta_rho", "xi_rho"], + np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear + { + "long_name": "x-locations of RHO-points", + "units": "meter", + "standard_name": "plane_x_coordinate", + "field": "x_rho, scalar", + }, + ), + "y_rho": ( + ["eta_rho", "xi_rho"], + np.tile(np.linspace(-89, 89, Y), (X, 1)).T, # note that this is not curvilinear + { + "long_name": "y-locations of RHO-points", + "units": "meter", + "standard_name": "plane_y_coordinate", + "field": "y_rho, scalar", + }, + ), + }, + ), } From a45ab954e13d4296f4851590210777ba55ad996e Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Wed, 2 Jul 2025 15:04:14 +0200 Subject: [PATCH 14/24] Adding GlobCuurent dataset (from copernicusmarine) --- .../structured/circulation_models.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index c43a79b443..8ed41a4467 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -86,6 +86,75 @@ ), }, ), + "ds_copernicusmarine_globcurrents": xr.Dataset( + # Copernicus Marine Service GlobCurrent dataset (MULTIOBS_GLO_PHY_MYNRT_015_003) + { + "ue": ( + ["time", "depth", "latitude", "longitude"], + np.random.rand(T, Z, Y, X), + { + "units": "m/s", + "standard_name": "eastward_sea_water_velocity_due_to_ekman_drift", + "long_name": "Depth Ekman driven velocity : zonal component", + "grid_mapping": "crs", + }, + ), + "ve": ( + ["time", "depth", "latitude", "longitude"], + np.random.rand(T, Z, Y, X), + { + "units": "m/s", + "standard_name": "northward_sea_water_velocity_due_to_ekman_drift", + "long_name": "Depth Ekman driven velocity : meridional component", + "grid_mapping": "crs", + }, + ), + }, + coords={ + "depth": ( + ["depth"], + np.linspace(-0.0, 15, Z), + { + "standard_name": "depth", + "long_name": "Depth", + "units": "m", + "axis": "Z", + "positive": "down", + }, + ), + "latitude": ( + ["latitude"], + np.linspace(-90, 90, Y), + { + "unit_long": "Degrees North", + "units": "degrees_north", + "axis": "Y", + "long_name": "Latitude", + "standard_name": "latitude", + }, + ), + "longitude": ( + ["longitude"], + np.linspace(-180, 180, X), + { + "unit_long": "Degrees East", + "units": "degrees_east", + "axis": "X", + "long_name": "Longitude", + "standard_name": "longitude", + }, + ), + "time": ( + ["time"], + TIME, + { + "axis": "T", + "long_name": "Time", + "standard_name": "time", + }, + ), + }, + ), "ds_NEMO_MOI_U": xr.Dataset( # NEMO model dataset (U component) as serviced by Mercator Ocean International { From 6fbd0ceaed623129a536e9461305aa9fe86cb32e Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Thu, 3 Jul 2025 08:43:02 +0200 Subject: [PATCH 15/24] Adding ECCO4 dataset --- .../structured/circulation_models.py | 222 ++++++++++++++++++ 1 file changed, 222 insertions(+) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index 8ed41a4467..fe9a562a6e 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -606,6 +606,228 @@ ), }, ), + "ds_ecco4": xr.Dataset( + # ECCO V4r4 model dataset (from https://podaac.jpl.nasa.gov/dataset/ECCO_L4_OCEAN_VEL_LLC0090GRID_DAILY_V4R4#capability-modal-download) + { + "UVEL": ( + ["time", "k", "tile", "j", "i_g"], + np.random.rand(T, Z, 13, Y, X, dtype="float32"), + { + "long_name": "Horizontal velocity in the model +x direction", + "units": "m s-1", + "mate": "VVEL", + "coverage_content_type": "modelResult", + "direction": ">0 increases volume", + "standard_name": "sea_water_x_velocity", + "comment": "Horizontal velocity in the +x direction at the 'u' face of the tracer cell on the native model grid. Note: in the Arakawa-C grid, horizontal velocities are staggered relative to the tracer cells with indexing such that +UVEL(i_g,j,k) corresponds to +x fluxes through the 'u' face of the tracer cell at (i,j,k). Do NOT use UVEL for volume flux calculations because the model's grid cell thicknesses vary with time (z* coordinates); use UVELMASS instead. Also, the model +x direction does not necessarily correspond to the geographical east-west direction because the x and y axes of the model's curvilinear lat-lon-cap (llc) grid have arbitrary orientations which vary within and across tiles. See EVEL and NVEL for zonal and meridional velocity.", + "valid_min": -2.139253616333008, + "valid_max": 2.038635015487671, + }, + ), + "VVEL": ( + ["time", "k", "tile", "j_g", "i"], + np.random.rand(T, Z, 13, Y, X, dtype="float32"), + { + "long_name": "Horizontal velocity in the model +y direction", + "units": "m s-1", + "mate": "UVEL", + "coverage_content_type": "modelResult", + "direction": ">0 increases volume", + "standard_name": "sea_water_y_velocity", + "comment": "Horizontal velocity in the +y direction at the 'v' face of the tracer cell on the native model grid. Note: in the Arakawa-C grid, horizontal velocities are staggered relative to the tracer cells with indexing such that +VVEL(i,j_g,k) corresponds to +y fluxes through the 'v' face of the tracer cell at (i,j,k). Do NOT use VVEL for volume flux calculations because the model's grid cell thicknesses vary with time (z* coordinates); use VVELMASS instead. Also, the model +y direction does not necessarily correspond to the geographical north-south direction because the x and y axes of the model's curvilinear lat-lon-cap (llc) grid have arbitrary orientations which vary within and across tiles. See EVEL and NVEL for zonal and meridional velocity.", + "valid_min": -1.7877743244171143, + "valid_max": 1.9089667797088623, + }, + ), + "WVEL": ( + ["time", "k_l", "tile", "j", "i"], + np.random.rand(T, Z, 13, Y, X, dtype="float32"), + { + "long_name": "Vertical velocity", + "units": "m s-1", + "coverage_content_type": "modelResult", + "direction": ">0 decreases volume", + "standard_name": "upward_sea_water_velocity", + "comment": "Vertical velocity in the +z direction at the top 'w' face of the tracer cell on the native model grid. Note: in the Arakawa-C grid, vertical velocities are staggered relative to the tracer cells with indexing such that +WVEL(i,j,k_l) corresponds to upward +z motion through the top 'w' face of the tracer cell at (i,j,k). WVEL is identical to WVELMASS.", + "valid_min": -0.0023150660563260317, + "valid_max": 0.0016380994347855449, + }, + ), + }, + coords={ + "time": ( + ["time"], + TIME, + { + "long_name": "center time of averaging period", + "standard_name": "time", + "axis": "T", + "bounds": "time_bnds", + "coverage_content_type": "coordinate", + }, + ), + "tile": ( + ["tile"], + np.arange(13, dtype="int32"), + { + "long_name": "lat-lon-cap tile index", + "coverage_content_type": "coordinate", + "comment": "The ECCO V4 horizontal model grid is divided into 13 tiles of 90x90 cells for convenience.", + }, + ), + "k": ( + ["k"], + np.arange(Z, dtype="int32"), + { + "long_name": "grid index in z for tracer variables", + "axis": "Z", + "swap_dim": "Z", + "coverage_content_type": "coordinate", + }, + ), + "k_l": ( + ["k_l"], + np.arange(Z, dtype="int32"), + { + "long_name": "grid index in z corresponding to the top face of tracer grid cells ('w' locations)", + "axis": "Z", + "swap_dim": "Zl", + "coverage_content_type": "coordinate", + "c_grid_axis_shift": -0.5, + "comment": "First index corresponds to the top surface of the uppermost tracer grid cell. The use of 'l' in the variable name follows the MITgcm convention for ocean variables in which the lower (l) face of a tracer grid cell on the logical grid corresponds to the top face of the grid cell on the physical grid.", + }, + ), + "j": ( + ["j"], + np.arange(Y, dtype="int32"), + { + "long_name": "grid index in y for variables at tracer and 'u' locations", + "axis": "Y", + "swap_dim": "YC", + "coverage_content_type": "coordinate", + "comment": "In the Arakawa C-grid system, tracer (e.g., THETA) and 'u' variables (e.g., UVEL) have the same y coordinate on the model grid.", + }, + ), + "j_g": ( + ["j_g"], + np.arange(Y, dtype="int32"), + { + "long_name": "grid index in y for variables at 'v' and 'g' locations", + "axis": "Y", + "swap_dim": "YG", + "c_grid_axis_shift": -0.5, + "coverage_content_type": "coordinate", + "comment": "In the Arakawa C-grid system, 'v' (e.g., VVEL) and 'g' variables (e.g., XG) have the same y coordinate.", + }, + ), + "i": ( + ["i"], + np.arange(X, dtype="int32"), + { + "long_name": "grid index in x for variables at tracer and 'v' locations", + "axis": "X", + "swap_dim": "XC", + "coverage_content_type": "coordinate", + "comment": "In the Arakawa C-grid system, tracer (e.g., THETA) and 'v' variables (e.g., VVEL) have the same x coordinate on the model grid.", + }, + ), + "i_g": ( + ["i_g"], + np.arange(X, dtype="int32"), + { + "long_name": "grid index in x for variables at 'u' and 'g' locations", + "axis": "X", + "swap_dim": "XG", + "c_grid_axis_shift": -0.5, + "coverage_content_type": "coordinate", + "comment": "In the Arakawa C-grid system, 'u' (e.g., UVEL) and 'g' variables (e.g., XG) have the same x coordinate on the model grid.", + }, + ), + "Z": ( + ["k"], + np.linspace(-5, -5900, Z, dtype="float32"), + { + "long_name": "depth of tracer grid cell center", + "standard_name": "depth", + "units": "m", + "positive": "up", + "bounds": "Z_bnds", + "coverage_content_type": "coordinate", + "comment": "Non-uniform vertical spacing.", + }, + ), + "Zl": ( + ["k_l"], + np.linspace(0, -5678, Z, dtype="float32"), + { + "long_name": "depth of the top face of tracer grid cells", + "standard_name": "depth", + "units": "m", + "positive": "up", + "coverage_content_type": "coordinate", + "comment": "First element is 0m, the depth of the top face of the first tracer grid cell (ocean surface). Last element is the depth of the top face of the deepest grid cell. The use of 'l' in the variable name follows the MITgcm convention for ocean variables in which the lower (l) face of a tracer grid cell on the logical grid corresponds to the top face of the grid cell on the physical grid. In other words, the logical vertical grid of MITgcm ocean variables is inverted relative to the physical vertical grid.", + }, + ), + "YC": ( + ["tile", "j", "i"], + np.tile( + np.tile(np.linspace(-89, 89, Y), (X, 1)).T, (13, 1, 1) + ), # NOTE this grid is not correct, as duplicates for each tile + { + "long_name": "latitude of tracer grid cell center", + "standard_name": "latitude", + "units": "degrees_north", + "coordinate": "YC XC", + "bounds": "YC_bnds", + "coverage_content_type": "coordinate", + "comment": "nonuniform grid spacing", + }, + ), + "YG": ( + ["tile", "j_g", "i_g"], + np.tile( + np.tile(np.linspace(-89, 89, Y), (X, 1)).T, (13, 1, 1) + ), # NOTE this grid is not correct, as duplicates for each tile + { + "long_name": "latitude of 'southwest' corner of tracer grid cell", + "standard_name": "latitude", + "units": "degrees_north", + "coordinate": "YG XG", + "coverage_content_type": "coordinate", + "comment": "Nonuniform grid spacing. Note: 'southwest' does not correspond to geographic orientation but is used for convenience to describe the computational grid. See MITgcm documentation for details.", + }, + ), + "XC": ( + ["tile", "j", "i"], + np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)).reshape( + 13, Y, X + ), # NOTE this grid is not correct, as duplicates for each tile + { + "long_name": "longitude of tracer grid cell center", + "standard_name": "longitude", + "units": "degrees_east", + "coordinate": "YC XC", + "bounds": "XC_bnds", + "coverage_content_type": "coordinate", + "comment": "nonuniform grid spacing", + }, + ), + "XG": ( + ["tile", "j_g", "i_g"], + np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)).reshape( + 13, Y, X + ), # NOTE this grid is not correct, as duplicates for each tile + { + "long_name": "longitude of 'southwest' corner of tracer grid cell", + "standard_name": "longitude", + "units": "degrees_east", + "coordinate": "YG XG", + "coverage_content_type": "coordinate", + "comment": "Nonuniform grid spacing. Note: 'southwest' does not correspond to geographic orientation but is used for convenience to describe the computational grid. See MITgcm documentation for details.", + }, + ), + }, + ), "ds_CROCO_idealized": xr.Dataset( # CROCO idealized model dataset { From 5c17b81089c4c6cff473456e1dd64f2440a2ea46 Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Thu, 3 Jul 2025 10:34:16 +0200 Subject: [PATCH 16/24] Adding MITgcm netcdf dataset --- .../structured/circulation_models.py | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index fe9a562a6e..47352ea619 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -432,6 +432,104 @@ ), }, ), + "ds_MITgcm_netcdf": xr.Dataset( + # MITgcm model dataset in netCDF format + { + "U": ( + ["T", "Z", "Y", "Xp1"], + np.random.rand(T, Z, Y, X + 1, dtype="float32"), + { + "units": "m/s", + "coordinates": "XU YU RC iter", + }, + ), + "V": ( + ["T", "Z", "Yp1", "X"], + np.random.rand(T, Z, Y + 1, X, dtype="float32"), + { + "units": "m/s", + "coordinates": "XV YV RC iter", + }, + ), + "W": ( + ["T", "Zl", "Y", "X"], + np.random.rand(T, Z, Y, X, dtype="float32"), + { + "units": "m/s", + "coordinates": "XC YC RC iter", + }, + ), + "Temp": ( + ["T", "Z", "Y", "X"], + np.random.rand(T, Z, Y, X, dtype="float32"), + { + "units": "degC", + "coordinates": "XC YC RC iter", + "long_name": "potential_temperature", + }, + ), + }, + coords={ + "T": ( + ["T"], + np.arange(0, T, dtype="float64"), + { + "long_name": "model_time", + "units": "s", + }, + ), + "Z": ( + ["Z"], + np.linspace(-25, -5000, Z, dtype="float64"), + { + "long_name": "vertical coordinate of cell center", + "units": "meters", + "positive": "up", + }, + ), + "Zl": ( + ["Zl"], + np.linspace(0, -4500, Z, dtype="float64"), + { + "long_name": "vertical coordinate of upper cell interface", + "units": "meters", + "positive": "up", + }, + ), + "Y": ( + ["Y"], + np.linspace(500, 5000, Y, dtype="float64"), + { + "long_name": "Y-Coordinate of cell center", + "units": "meters", + }, + ), + "Yp1": ( + ["Yp1"], + np.linspace(0, 4500, Y + 1, dtype="float64"), + { + "long_name": "Y-Coordinate of cell corner", + "units": "meters", + }, + ), + "X": ( + ["X"], + np.linspace(500, 5000, X, dtype="float64"), + { + "long_name": "X-coordinate of cell center", + "units": "meters", + }, + ), + "Xp1": ( + ["Xp1"], + np.linspace(0, 4100, X + 1, dtype="float64"), + { + "long_name": "X-Coordinate of cell corner", + "units": "meters", + }, + ), + }, + ), "ds_ERA5_wind": xr.Dataset( # ERA5 10m wind model dataset { From f1b0964b8b540e6b47c5b6125eed1569e7c01d07 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 3 Jul 2025 12:23:13 +0200 Subject: [PATCH 17/24] Move circulation model data definitions to own functions --- .../structured/circulation_models.py | 279 ++++++++++-------- 1 file changed, 163 insertions(+), 116 deletions(-) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index 47352ea619..a8c3d0b10d 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -10,9 +10,9 @@ TIME = xr.date_range("2000", "2001", T) -datasets = { - "ds_copernicusmarine": xr.Dataset( - # Copernicus Marine Service dataset as retrieved by the `copernicusmarine` toolkit +def _copernicusmarine(): + """Copernicus Marine Service dataset as retrieved by the `copernicusmarine` toolkit""" + return xr.Dataset( { "uo": ( ["time", "depth", "latitude", "longitude"], @@ -85,9 +85,12 @@ }, ), }, - ), - "ds_copernicusmarine_globcurrents": xr.Dataset( - # Copernicus Marine Service GlobCurrent dataset (MULTIOBS_GLO_PHY_MYNRT_015_003) + ) + + +def _copernicusmarine_globcurrents(): + """Copernicus Marine Service GlobCurrent dataset (MULTIOBS_GLO_PHY_MYNRT_015_003)""" + return xr.Dataset( { "ue": ( ["time", "depth", "latitude", "longitude"], @@ -154,9 +157,12 @@ }, ), }, - ), - "ds_NEMO_MOI_U": xr.Dataset( - # NEMO model dataset (U component) as serviced by Mercator Ocean International + ) + + +def _NEMO_MOI_U(): + """NEMO model dataset (U component) as serviced by Mercator Ocean International""" + return xr.Dataset( { "vozocrtx": ( ["deptht", "y", "x"], @@ -259,9 +265,12 @@ }, ), }, - ), - "ds_NEMO_MOI_V": xr.Dataset( - # NEMO model dataset (V component) as serviced by Mercator Ocean International + ) + + +def _NEMO_MOI_V(): + """NEMO model dataset (V component) as serviced by Mercator Ocean International""" + return xr.Dataset( { "vomecrty": ( ["deptht", "y", "x"], @@ -348,92 +357,101 @@ }, ), }, - ), - "ds_CESM": xr.Dataset( - # CESM model dataset - { - "UVEL": ( - ["time", "z_t", "nlat", "nlon"], - np.random.rand(T, Z, Y, X, dtype="float32"), - { - "long_name": "Velocity in grid-x direction", - "units": "centimeter/s", - "grid_loc": 3221, - "cell_methods": "time:mean", - }, - ), - "VVEL": ( - ["time", "z_t", "nlat", "nlon"], - np.random.rand(T, Z, Y, X, dtype="float32"), - { - "long_name": "Velocity in grid-y direction", - "units": "centimeter/s", - "grid_loc": 3221, - "cell_methods": "time:mean", - }, - ), - "WVEL": ( - ["time", "z_w_top", "nlat", "nlon"], - np.random.rand(T, Z, Y, X, dtype="float32"), - { - "long_name": "Vertical Velocity", - "units": "centimeter/s", - "grid_loc": 3112, - "cell_methods": "time:mean", - }, - ), - }, - coords={ - "time": ( - ["time"], - TIME, - { - "long_name": "time", - "bounds": "time_bounds", - }, - ), - "z_t": ( - ["z_t"], - np.linspace(0, 5000, Z, dtype="float32"), - { - "long_name": "depth from surface to midpoint of layer", - "units": "centimeters", - "positive": "down", - "valid_min": 500.0, - "valid_max": 537500.0, - }, - ), - "z_w_top": ( - ["z_w_top"], - np.linspace(0, 5000, Z, dtype="float32"), - { - "long_name": "depth from surface to top of layer", - "units": "centimeters", - "positive": "down", - "valid_min": 0.0, - "valid_max": 525000.94, - }, - ), - "ULONG": ( - ["nlat", "nlon"], - np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear - { - "long_name": "array of u-grid longitudes", - "units": "degrees_east", - }, - ), - "ULAT": ( - ["nlat", "nlon"], - np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear - { - "long_name": "array of u-grid latitudes", - "units": "degrees_north", - }, - ), - }, - ), - "ds_MITgcm_netcdf": xr.Dataset( - # MITgcm model dataset in netCDF format + ) + + +def _CESM(): + """CESM model dataset""" + return ( + xr.Dataset( + { + "UVEL": ( + ["time", "z_t", "nlat", "nlon"], + np.random.rand(T, Z, Y, X, dtype="float32"), + { + "long_name": "Velocity in grid-x direction", + "units": "centimeter/s", + "grid_loc": 3221, + "cell_methods": "time:mean", + }, + ), + "VVEL": ( + ["time", "z_t", "nlat", "nlon"], + np.random.rand(T, Z, Y, X, dtype="float32"), + { + "long_name": "Velocity in grid-y direction", + "units": "centimeter/s", + "grid_loc": 3221, + "cell_methods": "time:mean", + }, + ), + "WVEL": ( + ["time", "z_w_top", "nlat", "nlon"], + np.random.rand(T, Z, Y, X, dtype="float32"), + { + "long_name": "Vertical Velocity", + "units": "centimeter/s", + "grid_loc": 3112, + "cell_methods": "time:mean", + }, + ), + }, + coords={ + "time": ( + ["time"], + TIME, + { + "long_name": "time", + "bounds": "time_bounds", + }, + ), + "z_t": ( + ["z_t"], + np.linspace(0, 5000, Z, dtype="float32"), + { + "long_name": "depth from surface to midpoint of layer", + "units": "centimeters", + "positive": "down", + "valid_min": 500.0, + "valid_max": 537500.0, + }, + ), + "z_w_top": ( + ["z_w_top"], + np.linspace(0, 5000, Z, dtype="float32"), + { + "long_name": "depth from surface to top of layer", + "units": "centimeters", + "positive": "down", + "valid_min": 0.0, + "valid_max": 525000.94, + }, + ), + "ULONG": ( + ["nlat", "nlon"], + np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear + { + "long_name": "array of u-grid longitudes", + "units": "degrees_east", + }, + ), + "ULAT": ( + ["nlat", "nlon"], + np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear + { + "long_name": "array of u-grid latitudes", + "units": "degrees_north", + }, + ), + }, + ), + ) + + +def _MITgcm_netcdf(): + """MITgcm model dataset in netCDF format""" + return xr.Dataset( + # { "U": ( ["T", "Z", "Y", "Xp1"], @@ -529,9 +547,12 @@ }, ), }, - ), - "ds_ERA5_wind": xr.Dataset( - # ERA5 10m wind model dataset + ) + + +def _ERA5_wind(): + """ERA5 10m wind model dataset""" + return xr.Dataset( { "u10": ( ["time", "latitude", "longitude"], @@ -575,9 +596,12 @@ }, ), }, - ), - "ds_FES_tides": xr.Dataset( - # FES tidal model dataset + ) + + +def _FES_tides(): + """FES tidal model dataset""" + return xr.Dataset( { "Ug": ( ["lat", "lon"], @@ -624,9 +648,12 @@ }, ), }, - ), - "ds_hycom_espc": xr.Dataset( - # HYCOM ESPC model dataset from https://data.hycom.org/datasets/ESPC-D-V02/data/daily_netcdf/2025/ + ) + + +def _hycom_espc(): + """HYCOM ESPC model dataset from https://data.hycom.org/datasets/ESPC-D-V02/data/daily_netcdf/2025/""" + return xr.Dataset( { "water_u": ( ["time", "depth", "lat", "lon"], @@ -703,9 +730,12 @@ }, ), }, - ), - "ds_ecco4": xr.Dataset( - # ECCO V4r4 model dataset (from https://podaac.jpl.nasa.gov/dataset/ECCO_L4_OCEAN_VEL_LLC0090GRID_DAILY_V4R4#capability-modal-download) + ) + + +def _ecco4(): + """ECCO V4r4 model dataset (from https://podaac.jpl.nasa.gov/dataset/ECCO_L4_OCEAN_VEL_LLC0090GRID_DAILY_V4R4#capability-modal-download)""" + return xr.Dataset( { "UVEL": ( ["time", "k", "tile", "j", "i_g"], @@ -925,9 +955,12 @@ }, ), }, - ), - "ds_CROCO_idealized": xr.Dataset( - # CROCO idealized model dataset + ) + + +def _CROCO_idealized(): + """CROCO idealized model dataset""" + return xr.Dataset( { "u": ( ["time", "s_rho", "eta_rho", "xi_u"], @@ -1049,7 +1082,7 @@ "standard_name": "y_grid_index_at_v_location", "axis": "Y", "c_grid_axis_shift": 0.5, - "c_grid_dynamic_range": f"2:{Y-1}", + "c_grid_dynamic_range": f"2:{Y - 1}", }, ), "xi_rho": ( @@ -1070,7 +1103,7 @@ "standard_name": "x_grid_index_at_u_location", "axis": "X", "c_grid_axis_shift": 0.5, - "c_grid_dynamic_range": f"2:{X-1}", + "c_grid_dynamic_range": f"2:{X - 1}", }, ), "x_rho": ( @@ -1094,5 +1127,19 @@ }, ), }, - ), + ) + + +datasets = { + "ds_copernicusmarine": _copernicusmarine(), + "ds_copernicusmarine_globcurrents": _copernicusmarine_globcurrents(), + "ds_NEMO_MOI_U": _NEMO_MOI_U(), + "ds_NEMO_MOI_V": _NEMO_MOI_V(), + "ds_CESM": _CESM(), + "ds_MITgcm_netcdf": _MITgcm_netcdf(), + "ds_ERA5_wind": _ERA5_wind(), + "ds_FES_tides": _FES_tides(), + "ds_hycom_espc": _hycom_espc(), + "ds_ecco4": _ecco4(), + "ds_CROCO_idealized": _CROCO_idealized(), } From f180f9eada858fb7516bdb39b7a27e725288bbbc Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 3 Jul 2025 12:26:05 +0200 Subject: [PATCH 18/24] Remove dtype arg It is an invalid argumnet to the rand() function --- parcels/_datasets/structured/circulation_models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index a8c3d0b10d..b84a461080 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -166,7 +166,7 @@ def _NEMO_MOI_U(): { "vozocrtx": ( ["deptht", "y", "x"], - np.random.rand(Z, Y, X, dtype="float64"), + np.random.rand(Z, Y, X), { "units": "m s-1", "valid_min": -10.0, @@ -183,7 +183,7 @@ def _NEMO_MOI_U(): ), "sotkeavmu1": ( ["y", "x"], - np.random.rand(Y, X, dtype="float64"), + np.random.rand(Y, X), { "units": "m2 s-1", "valid_min": 0.0, @@ -274,7 +274,7 @@ def _NEMO_MOI_V(): { "vomecrty": ( ["deptht", "y", "x"], - np.random.rand(Z, Y, X, dtype="float64"), + np.random.rand(Z, Y, X), { "units": "m s-1", "valid_min": -10.0, From aeff736c30a45ecfc98d63459cda6b006c89e214 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 3 Jul 2025 12:32:50 +0200 Subject: [PATCH 19/24] Add import of circulation_models in test suite --- tests/v4/test_fieldset.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/v4/test_fieldset.py b/tests/v4/test_fieldset.py index be5fc04a41..f623b7f1c4 100644 --- a/tests/v4/test_fieldset.py +++ b/tests/v4/test_fieldset.py @@ -6,6 +6,9 @@ import xarray as xr from parcels import xgcm +from parcels._datasets.structured.circulation_models import ( + datasets as datasets_circulation_models, # noqa: F401 +) # just making sure the import works. Will eventually be used in tests from parcels._datasets.structured.generic import T as T_structured from parcels._datasets.structured.generic import datasets as datasets_structured from parcels.field import Field, VectorField From d604d469471247a522bc2b21871caf1cad891926 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 3 Jul 2025 12:44:44 +0200 Subject: [PATCH 20/24] Fix xarray dim issues and invalid rand dtype param --- .../structured/circulation_models.py | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index b84a461080..326f848357 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -200,7 +200,7 @@ def _NEMO_MOI_U(): }, coords={ "nav_lon": ( - ["y, x"], + ["y", "x"], np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear { "units": "degrees_east", @@ -212,7 +212,7 @@ def _NEMO_MOI_U(): }, ), "nav_lat": ( - ["y, x"], + ["y", "x"], np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear { "units": "degrees_north", @@ -242,7 +242,7 @@ def _NEMO_MOI_U(): }, ), "time_counter": ( - [], + ["time_counter"], np.empty(0, dtype="datetime64[ns]"), { "standard_name": "time", @@ -292,7 +292,7 @@ def _NEMO_MOI_V(): }, coords={ "nav_lon": ( - ["y, x"], + ["y", "x"], np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear { "units": "degrees_east", @@ -304,7 +304,7 @@ def _NEMO_MOI_V(): }, ), "nav_lat": ( - ["y, x"], + ["y", "x"], np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear { "units": "degrees_north", @@ -334,7 +334,7 @@ def _NEMO_MOI_V(): }, ), "time_counter": ( - [], + ["time_counter"], np.empty(0, dtype="datetime64[ns]"), { "standard_name": "time", @@ -367,7 +367,7 @@ def _CESM(): { "UVEL": ( ["time", "z_t", "nlat", "nlon"], - np.random.rand(T, Z, Y, X, dtype="float32"), + np.random.rand(T, Z, Y, X).astype("float32"), { "long_name": "Velocity in grid-x direction", "units": "centimeter/s", @@ -377,7 +377,7 @@ def _CESM(): ), "VVEL": ( ["time", "z_t", "nlat", "nlon"], - np.random.rand(T, Z, Y, X, dtype="float32"), + np.random.rand(T, Z, Y, X).astype("float32"), { "long_name": "Velocity in grid-y direction", "units": "centimeter/s", @@ -387,7 +387,7 @@ def _CESM(): ), "WVEL": ( ["time", "z_w_top", "nlat", "nlon"], - np.random.rand(T, Z, Y, X, dtype="float32"), + np.random.rand(T, Z, Y, X).astype("float32"), { "long_name": "Vertical Velocity", "units": "centimeter/s", @@ -455,7 +455,7 @@ def _MITgcm_netcdf(): { "U": ( ["T", "Z", "Y", "Xp1"], - np.random.rand(T, Z, Y, X + 1, dtype="float32"), + np.random.rand(T, Z, Y, X + 1).astype("float32"), { "units": "m/s", "coordinates": "XU YU RC iter", @@ -463,7 +463,7 @@ def _MITgcm_netcdf(): ), "V": ( ["T", "Z", "Yp1", "X"], - np.random.rand(T, Z, Y + 1, X, dtype="float32"), + np.random.rand(T, Z, Y + 1, X).astype("float32"), { "units": "m/s", "coordinates": "XV YV RC iter", @@ -471,7 +471,7 @@ def _MITgcm_netcdf(): ), "W": ( ["T", "Zl", "Y", "X"], - np.random.rand(T, Z, Y, X, dtype="float32"), + np.random.rand(T, Z, Y, X).astype("float32"), { "units": "m/s", "coordinates": "XC YC RC iter", @@ -479,7 +479,7 @@ def _MITgcm_netcdf(): ), "Temp": ( ["T", "Z", "Y", "X"], - np.random.rand(T, Z, Y, X, dtype="float32"), + np.random.rand(T, Z, Y, X).astype("float32"), { "units": "degC", "coordinates": "XC YC RC iter", @@ -556,7 +556,7 @@ def _ERA5_wind(): { "u10": ( ["time", "latitude", "longitude"], - np.random.rand(T, Y, X, dtype="float32"), + np.random.rand(T, Y, X).astype("float32"), { "long_name": "10 metre U wind component", "units": "m s**-1", @@ -564,7 +564,7 @@ def _ERA5_wind(): ), "v10": ( ["time", "latitude", "longitude"], - np.random.rand(T, Y, X, dtype="float32"), + np.random.rand(T, Y, X).astype("float32"), { "long_name": "10 metre V wind component", "units": "m s**-1", @@ -605,7 +605,7 @@ def _FES_tides(): { "Ug": ( ["lat", "lon"], - np.random.rand(Y, X, dtype="float32"), + np.random.rand(Y, X).astype("float32"), { "long_name": "Eastward sea water velocity phaselag due to non equilibrium ocean tide at m2 frequency", "units": "degrees", @@ -614,7 +614,7 @@ def _FES_tides(): ), "Ua": ( ["lat", "lon"], - np.random.rand(Y, X, dtype="float32"), + np.random.rand(Y, X).astype("float32"), { "long_name": "Eastward sea water velocity amplitude due to non equilibrium ocean tide at m2 frequency", "units": "cm/s", @@ -657,7 +657,7 @@ def _hycom_espc(): { "water_u": ( ["time", "depth", "lat", "lon"], - np.random.rand(T, Z, Y, X, dtype="float32"), + np.random.rand(T, Z, Y, X).astype("float32"), { "long_name": "Eastward Water Velocity", "standard_name": "eastward_sea_water_velocity", @@ -739,7 +739,7 @@ def _ecco4(): { "UVEL": ( ["time", "k", "tile", "j", "i_g"], - np.random.rand(T, Z, 13, Y, X, dtype="float32"), + np.random.rand(T, Z, 13, Y, X).astype("float32"), { "long_name": "Horizontal velocity in the model +x direction", "units": "m s-1", @@ -754,7 +754,7 @@ def _ecco4(): ), "VVEL": ( ["time", "k", "tile", "j_g", "i"], - np.random.rand(T, Z, 13, Y, X, dtype="float32"), + np.random.rand(T, Z, 13, Y, X).astype("float32"), { "long_name": "Horizontal velocity in the model +y direction", "units": "m s-1", @@ -769,7 +769,7 @@ def _ecco4(): ), "WVEL": ( ["time", "k_l", "tile", "j", "i"], - np.random.rand(T, Z, 13, Y, X, dtype="float32"), + np.random.rand(T, Z, 13, Y, X).astype("float32"), { "long_name": "Vertical velocity", "units": "m s-1", @@ -964,7 +964,7 @@ def _CROCO_idealized(): { "u": ( ["time", "s_rho", "eta_rho", "xi_u"], - np.random.rand(T, Z, Y, X - 1, dtype="float32"), + np.random.rand(T, Z, Y, X - 1).astype("float32"), { "long_name": "u-momentum component", "units": "meter second-1", @@ -974,7 +974,7 @@ def _CROCO_idealized(): ), "v": ( ["time", "s_rho", "eta_v", "xi_rho"], - np.random.rand(T, Z, Y - 1, X, dtype="float32"), + np.random.rand(T, Z, Y - 1, X).astype("float32"), { "long_name": "v-momentum component", "units": "meter second-1", @@ -984,7 +984,7 @@ def _CROCO_idealized(): ), "w": ( ["time", "s_rho", "eta_rho", "xi_rho"], - np.random.rand(T, Z, Y, X, dtype="float32"), + np.random.rand(T, Z, Y, X).astype("float32"), { "long_name": "vertical momentum component", "units": "meter second-1", @@ -995,7 +995,7 @@ def _CROCO_idealized(): ), "h": ( ["eta_rho", "xi_rho"], - np.random.rand(Y, X, dtype="float32"), + np.random.rand(Y, X).astype("float32"), { "long_name": "bathymetry at RHO-points", "units": "meter", @@ -1005,7 +1005,7 @@ def _CROCO_idealized(): ), "zeta": ( ["time", "eta_rho", "xi_rho"], - np.random.rand(T, Y, X, dtype="float32"), + np.random.rand(T, Y, X).astype("float32"), { "long_name": "free-surface", "units": "meter", @@ -1015,7 +1015,7 @@ def _CROCO_idealized(): ), "Cs_w": ( ["s_w"], - np.random.rand(Z + 1, dtype="float32"), + np.random.rand(Z + 1).astype("float32"), { "long_name": "S-coordinate stretching curves at W-points", }, From 1a6a5f38b0bf01768b54ccae196288e4726ca69a Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 3 Jul 2025 13:08:52 +0200 Subject: [PATCH 21/24] Replace arange with linspace --- .../structured/circulation_models.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index 326f848357..19c4b30267 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -669,7 +669,7 @@ def _hycom_espc(): ), "tau": ( ["time"], - np.arange(0, 24, T, dtype="float64"), + np.linspace(0, 24, T, dtype="float64"), { "long_name": "Tau", "units": "hours since analysis", @@ -735,11 +735,12 @@ def _hycom_espc(): def _ecco4(): """ECCO V4r4 model dataset (from https://podaac.jpl.nasa.gov/dataset/ECCO_L4_OCEAN_VEL_LLC0090GRID_DAILY_V4R4#capability-modal-download)""" + tiles = 13 return xr.Dataset( { "UVEL": ( ["time", "k", "tile", "j", "i_g"], - np.random.rand(T, Z, 13, Y, X).astype("float32"), + np.random.rand(T, Z, tiles, Y, X).astype("float32"), { "long_name": "Horizontal velocity in the model +x direction", "units": "m s-1", @@ -754,7 +755,7 @@ def _ecco4(): ), "VVEL": ( ["time", "k", "tile", "j_g", "i"], - np.random.rand(T, Z, 13, Y, X).astype("float32"), + np.random.rand(T, Z, tiles, Y, X).astype("float32"), { "long_name": "Horizontal velocity in the model +y direction", "units": "m s-1", @@ -769,7 +770,7 @@ def _ecco4(): ), "WVEL": ( ["time", "k_l", "tile", "j", "i"], - np.random.rand(T, Z, 13, Y, X).astype("float32"), + np.random.rand(T, Z, tiles, Y, X).astype("float32"), { "long_name": "Vertical velocity", "units": "m s-1", @@ -796,7 +797,7 @@ def _ecco4(): ), "tile": ( ["tile"], - np.arange(13, dtype="int32"), + np.arange(tiles, dtype="int32"), { "long_name": "lat-lon-cap tile index", "coverage_content_type": "coordinate", @@ -899,7 +900,7 @@ def _ecco4(): "YC": ( ["tile", "j", "i"], np.tile( - np.tile(np.linspace(-89, 89, Y), (X, 1)).T, (13, 1, 1) + np.tile(np.linspace(-89, 89, Y), (X, 1)).T, (tiles, 1, 1) ), # NOTE this grid is not correct, as duplicates for each tile { "long_name": "latitude of tracer grid cell center", @@ -914,7 +915,7 @@ def _ecco4(): "YG": ( ["tile", "j_g", "i_g"], np.tile( - np.tile(np.linspace(-89, 89, Y), (X, 1)).T, (13, 1, 1) + np.tile(np.linspace(-89, 89, Y), (X, 1)).T, (tiles, 1, 1) ), # NOTE this grid is not correct, as duplicates for each tile { "long_name": "latitude of 'southwest' corner of tracer grid cell", @@ -928,7 +929,7 @@ def _ecco4(): "XC": ( ["tile", "j", "i"], np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)).reshape( - 13, Y, X + tiles, Y, X ), # NOTE this grid is not correct, as duplicates for each tile { "long_name": "longitude of tracer grid cell center", @@ -943,7 +944,7 @@ def _ecco4(): "XG": ( ["tile", "j_g", "i_g"], np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)).reshape( - 13, Y, X + tiles, Y, X ), # NOTE this grid is not correct, as duplicates for each tile { "long_name": "longitude of 'southwest' corner of tracer grid cell", From 7e634045344948c0034e10576b26f0bcbb4e1282 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 3 Jul 2025 16:55:24 +0200 Subject: [PATCH 22/24] Update parcels._datasets docstring --- parcels/_datasets/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/parcels/_datasets/__init__.py b/parcels/_datasets/__init__.py index c1270f0017..29db98c5fe 100644 --- a/parcels/_datasets/__init__.py +++ b/parcels/_datasets/__init__.py @@ -3,7 +3,7 @@ This subpackage uses xarray to generate *idealised* structured and unstructured hydrodynamical datasets that are compatible with Parcels. The goals are three-fold: -1. To provide users with documentation for the types of datasets they can expect Parcels to work with. +1. To provide users with documentation for the types of datasets they can expect Parcels to work with. When reporting bugs, users can use these datasets to reproduce the bug they're experiencing (allowing developers to quickly troubleshoot the problem). 2. To supply our tutorials with hydrodynamical datasets. 3. To offer developers datasets for use in test cases. @@ -36,8 +36,10 @@ This subpackage is broken down into structured and unstructured parts. Each of these have common submodules: -* ``circulation_model`` -> hardcoded datasets with the intention of mimicking datasets from a certain (ocean) circulation model +* ``circulation_model`` -> hardcoded datasets with the intention of mimicking dataset structure from a certain (ocean) circulation model. If you'd like to see Parcel support a new model, please open an issue in our issue tracker. + * exposes a dict ``datasets`` mapping dataset names to xarray datasets * ``generic`` -> hardcoded datasets that are generic, and not tied to a certain (ocean) circulation model. Instead these focus on the fundamental properties of the dataset + * exposes a dict ``datasets`` mapping dataset names to xarray datasets * ``generated`` -> functions to generate datasets with varying properties * ``utils`` -> any utility functions necessary related to either generating or validating datasets From 89fc0a125c692b2c857da355c17df7805e015cc6 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 3 Jul 2025 16:55:55 +0200 Subject: [PATCH 23/24] Add dataset comparison tooling And remove redundant docstring --- parcels/_datasets/structured/generic.py | 2 - parcels/_datasets/utils.py | 92 +++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 2 deletions(-) diff --git a/parcels/_datasets/structured/generic.py b/parcels/_datasets/structured/generic.py index 72f070e384..2432f079c0 100644 --- a/parcels/_datasets/structured/generic.py +++ b/parcels/_datasets/structured/generic.py @@ -1,5 +1,3 @@ -"""Datasets focussing on grid geometry""" - import numpy as np import xarray as xr diff --git a/parcels/_datasets/utils.py b/parcels/_datasets/utils.py index 19c64643d9..0ced45baee 100644 --- a/parcels/_datasets/utils.py +++ b/parcels/_datasets/utils.py @@ -65,3 +65,95 @@ def dataset_repr_diff(ds1: xr.Dataset, ds2: xr.Dataset) -> str: diff = difflib.ndiff(repr1.splitlines(keepends=True), repr2.splitlines(keepends=True)) return "".join(diff) + + +def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): + print(f"Comparing {ds1_name} and {ds2_name}\n") + + # Compare dataset attributes + print("Dataset Attributes Comparison:") + if ds1.attrs == ds2.attrs: + print(" Dataset attributes are identical.") + else: + print(" Dataset attributes differ.") + for attr_name in set(ds1.attrs.keys()) | set(ds2.attrs.keys()): + if attr_name not in ds1.attrs: + print(f" Attribute '{attr_name}' only in {ds2_name}") + elif attr_name not in ds2.attrs: + print(f" Attribute '{attr_name}' only in {ds1_name}") + elif ds1.attrs[attr_name] != ds2.attrs[attr_name]: + print(f" Attribute '{attr_name}' differs:") + print(f" {ds1_name}: {ds1.attrs[attr_name]}") + print(f" {ds2_name}: {ds2.attrs[attr_name]}") + print("-" * 30) + + # Compare dimensions + print("Dimensions Comparison:") + ds1_dims = set(ds1.dims) + ds2_dims = set(ds2.dims) + if ds1_dims == ds2_dims: + print(" Dimension names are identical.") + else: + print(" Dimension names differ:") + print(f" {ds1_name} dims: {sorted(list(ds1_dims))}") + print(f" {ds2_name} dims: {sorted(list(ds2_dims))}") + + # For common dimensions, compare order (implicit by comparing coordinate values for sortedness) + # and size (though size is parameterized and expected to be different) + for dim_name in ds1_dims.intersection(ds2_dims): + print(f" Dimension '{dim_name}':") + # Sizes will differ due to DIM_SIZE, so we don't strictly compare them. + print(f" {ds1_name} size: {ds1.dims[dim_name]}, {ds2_name} size: {ds2.dims[dim_name]}") + # Check if coordinates associated with dimensions are sorted (increasing) + if dim_name in ds1.coords and dim_name in ds2.coords: + is_ds1_sorted = np.all(np.diff(ds1[dim_name].values) >= 0) if len(ds1[dim_name].values) > 1 else True + is_ds2_sorted = np.all(np.diff(ds2[dim_name].values) >= 0) if len(ds2[dim_name].values) > 1 else True + if is_ds1_sorted == is_ds2_sorted: + print(f" Order for '{dim_name}' is consistent (both sorted: {is_ds1_sorted})") + else: + print( + f" Order for '{dim_name}' differs: {ds1_name} sorted: {is_ds1_sorted}, {ds2_name} sorted: {is_ds2_sorted}" + ) + print("-" * 30) + + # Compare variables (name, attributes, dimensions used) + print("Variables Comparison:") + ds1_vars = set(ds1.variables.keys()) + ds2_vars = set(ds2.variables.keys()) + + if ds1_vars == ds2_vars: + print(" Variable names are identical.") + else: + print(" Variable names differ:") + print(f" {ds1_name} vars: {sorted(list(ds1_vars - ds2_vars))}") + print(f" {ds2_name} vars: {sorted(list(ds2_vars - ds1_vars))}") + print(f" Common vars: {sorted(list(ds1_vars.intersection(ds2_vars)))}") + + for var_name in ds1_vars.intersection(ds2_vars): + print(f" Variable '{var_name}':") + var1 = ds1[var_name] + var2 = ds2[var_name] + + # Compare attributes + if var1.attrs == var2.attrs: + print(" Attributes are identical.") + else: + print(" Attributes differ.") + for attr_name in set(var1.attrs.keys()) | set(var2.attrs.keys()): + if attr_name not in var1.attrs: + print(f" Attribute '{attr_name}' only in {ds2_name}'s '{var_name}'") + elif attr_name not in var2.attrs: + print(f" Attribute '{attr_name}' only in {ds1_name}'s '{var_name}'") + elif var1.attrs[attr_name] != var2.attrs[attr_name]: + print(f" Attribute '{attr_name}' differs for '{var_name}':") + print(f" {ds1_name}: {var1.attrs[attr_name]}") + print(f" {ds2_name}: {var2.attrs[attr_name]}") + + # Compare dimensions used by the variable + if var1.dims == var2.dims: + print(f" Dimensions used are identical: {var1.dims}") + else: + print(" Dimensions used differ:") + print(f" {ds1_name}: {var1.dims}") + print(f" {ds2_name}: {var2.dims}") + print("=" * 30 + " End of Comparison " + "=" * 30) From 52a490d22da54c7caf91e26a24ad37ad22ba2f9c Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Fri, 4 Jul 2025 08:07:15 +0200 Subject: [PATCH 24/24] Fixing ecco tiles lat_grid and lon_grid --- .../structured/circulation_models.py | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index 19c4b30267..d34bcdc058 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -736,6 +736,12 @@ def _hycom_espc(): def _ecco4(): """ECCO V4r4 model dataset (from https://podaac.jpl.nasa.gov/dataset/ECCO_L4_OCEAN_VEL_LLC0090GRID_DAILY_V4R4#capability-modal-download)""" tiles = 13 + lon_grid = np.tile( + np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), (tiles, 1, 1) + ) # NOTE this grid is not correct, as duplicates for each tile + lat_grid = np.tile( + np.tile(np.linspace(-89, 89, Y), (X, 1)).T, (tiles, 1, 1) + ) # NOTE this grid is not correct, as duplicates for each tile return xr.Dataset( { "UVEL": ( @@ -899,9 +905,7 @@ def _ecco4(): ), "YC": ( ["tile", "j", "i"], - np.tile( - np.tile(np.linspace(-89, 89, Y), (X, 1)).T, (tiles, 1, 1) - ), # NOTE this grid is not correct, as duplicates for each tile + lat_grid, { "long_name": "latitude of tracer grid cell center", "standard_name": "latitude", @@ -914,9 +918,7 @@ def _ecco4(): ), "YG": ( ["tile", "j_g", "i_g"], - np.tile( - np.tile(np.linspace(-89, 89, Y), (X, 1)).T, (tiles, 1, 1) - ), # NOTE this grid is not correct, as duplicates for each tile + lat_grid, { "long_name": "latitude of 'southwest' corner of tracer grid cell", "standard_name": "latitude", @@ -928,9 +930,7 @@ def _ecco4(): ), "XC": ( ["tile", "j", "i"], - np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)).reshape( - tiles, Y, X - ), # NOTE this grid is not correct, as duplicates for each tile + lon_grid, { "long_name": "longitude of tracer grid cell center", "standard_name": "longitude", @@ -943,9 +943,7 @@ def _ecco4(): ), "XG": ( ["tile", "j_g", "i_g"], - np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)).reshape( - tiles, Y, X - ), # NOTE this grid is not correct, as duplicates for each tile + lon_grid, { "long_name": "longitude of 'southwest' corner of tracer grid cell", "standard_name": "longitude",