diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index d34bcdc058..c4eec14bc3 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -7,7 +7,7 @@ __all__ = ["T", "X", "Y", "Z", "datasets"] -TIME = xr.date_range("2000", "2001", T) +TIME = np.datetime64("2000-01-01") + np.arange(T) * np.timedelta64(1, "D") def _copernicusmarine(): @@ -47,7 +47,7 @@ def _copernicusmarine(): "unit_long": "Meters", "units": "m", "axis": "Z", - "long_name": "depth", + "long_name": "Depth", "standard_name": "depth", "positive": "down", }, @@ -88,7 +88,7 @@ def _copernicusmarine(): ) -def _copernicusmarine_globcurrents(): +def _copernicusmarine_globcurrent(): """Copernicus Marine Service GlobCurrent dataset (MULTIOBS_GLO_PHY_MYNRT_015_003)""" return xr.Dataset( { @@ -121,6 +121,7 @@ def _copernicusmarine_globcurrents(): "standard_name": "depth", "long_name": "Depth", "units": "m", + "unit_long": "Meters", "axis": "Z", "positive": "down", }, @@ -172,7 +173,6 @@ def _NEMO_MOI_U(): "valid_min": -10.0, "valid_max": 10.0, "long_name": "Zonal velocity", - "unit_long": "Meters per second", "standard_name": "sea_water_x_velocity", "short_name": "vozocrtx", "online_operation": "N/A", @@ -241,16 +241,6 @@ def _NEMO_MOI_U(): "units": "1", }, ), - "time_counter": ( - ["time_counter"], - np.empty(0, dtype="datetime64[ns]"), - { - "standard_name": "time", - "long_name": "Time axis", - "axis": "T", - "time_origin": "1950-JAN-01 00:00:00", - }, - ), "deptht": ( ["deptht"], np.linspace(1, 5500, Z, dtype="float64"), @@ -280,7 +270,6 @@ def _NEMO_MOI_V(): "valid_min": -10.0, "valid_max": 10.0, "long_name": "Meridional velocity", - "unit_long": "Meters per second", "standard_name": "sea_water_y_velocity", "short_name": "vomecrty", "online_operation": "N/A", @@ -296,8 +285,8 @@ def _NEMO_MOI_V(): np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear { "units": "degrees_east", - "valid_min": -179.99984754002182, - "valid_max": 179.999842386314, + "valid_min": -179.9999951021171, + "valid_max": 180.0, "long_name": "Longitude", "nav_model": "Default grid", "standard_name": "longitude", @@ -308,8 +297,8 @@ def _NEMO_MOI_V(): np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear { "units": "degrees_north", - "valid_min": -77.0104751586914, - "valid_max": 89.9591064453125, + "valid_min": -77.00110752801133, + "valid_max": 89.95529158641207, "long_name": "Latitude", "nav_model": "Default grid", "standard_name": "latitude", @@ -333,16 +322,6 @@ def _NEMO_MOI_V(): "units": "1", }, ), - "time_counter": ( - ["time_counter"], - np.empty(0, dtype="datetime64[ns]"), - { - "standard_name": "time", - "long_name": "Time axis", - "axis": "T", - "time_origin": "1950-JAN-01 00:00:00", - }, - ), "deptht": ( ["deptht"], np.linspace(1, 5500, Z, dtype="float64"), @@ -362,89 +341,87 @@ def _NEMO_MOI_V(): def _CESM(): """CESM model dataset""" - return ( - xr.Dataset( - { - "UVEL": ( - ["time", "z_t", "nlat", "nlon"], - np.random.rand(T, Z, Y, X).astype("float32"), - { - "long_name": "Velocity in grid-x direction", - "units": "centimeter/s", - "grid_loc": 3221, - "cell_methods": "time:mean", - }, - ), - "VVEL": ( - ["time", "z_t", "nlat", "nlon"], - np.random.rand(T, Z, Y, X).astype("float32"), - { - "long_name": "Velocity in grid-y direction", - "units": "centimeter/s", - "grid_loc": 3221, - "cell_methods": "time:mean", - }, - ), - "WVEL": ( - ["time", "z_w_top", "nlat", "nlon"], - np.random.rand(T, Z, Y, X).astype("float32"), - { - "long_name": "Vertical Velocity", - "units": "centimeter/s", - "grid_loc": 3112, - "cell_methods": "time:mean", - }, - ), - }, - coords={ - "time": ( - ["time"], - TIME, - { - "long_name": "time", - "bounds": "time_bounds", - }, - ), - "z_t": ( - ["z_t"], - np.linspace(0, 5000, Z, dtype="float32"), - { - "long_name": "depth from surface to midpoint of layer", - "units": "centimeters", - "positive": "down", - "valid_min": 500.0, - "valid_max": 537500.0, - }, - ), - "z_w_top": ( - ["z_w_top"], - np.linspace(0, 5000, Z, dtype="float32"), - { - "long_name": "depth from surface to top of layer", - "units": "centimeters", - "positive": "down", - "valid_min": 0.0, - "valid_max": 525000.94, - }, - ), - "ULONG": ( - ["nlat", "nlon"], - np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear - { - "long_name": "array of u-grid longitudes", - "units": "degrees_east", - }, - ), - "ULAT": ( - ["nlat", "nlon"], - np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear - { - "long_name": "array of u-grid latitudes", - "units": "degrees_north", - }, - ), - }, - ), + return xr.Dataset( + { + "UVEL": ( + ["time", "z_t", "nlat", "nlon"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "long_name": "Velocity in grid-x direction", + "units": "centimeter/s", + "grid_loc": "3221", + "cell_methods": "time: mean", + }, + ), + "VVEL": ( + ["time", "z_t", "nlat", "nlon"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "long_name": "Velocity in grid-y direction", + "units": "centimeter/s", + "grid_loc": "3221", + "cell_methods": "time: mean", + }, + ), + "WVEL": ( + ["time", "z_w_top", "nlat", "nlon"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "long_name": "Vertical Velocity", + "units": "centimeter/s", + "grid_loc": "3112", + "cell_methods": "time: mean", + }, + ), + }, + coords={ + "time": ( + ["time"], + np.linspace(0, 5000, T), + { + "long_name": "time", + "bounds": "time_bound", + }, + ), + "z_t": ( + ["z_t"], + np.linspace(0, 5000, Z, dtype="float32"), + { + "long_name": "depth from surface to midpoint of layer", + "units": "centimeters", + "positive": "down", + "valid_min": 500.0, + "valid_max": 537500.0, + }, + ), + "z_w_top": ( + ["z_w_top"], + np.linspace(0, 5000, Z, dtype="float32"), + { + "long_name": "depth from surface to top of layer", + "units": "centimeters", + "positive": "down", + "valid_min": 0.0, + "valid_max": 525000.9375, + }, + ), + "ULONG": ( + ["nlat", "nlon"], + np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear + { + "long_name": "array of u-grid longitudes", + "units": "degrees_east", + }, + ), + "ULAT": ( + ["nlat", "nlon"], + np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear + { + "long_name": "array of u-grid latitudes", + "units": "degrees_north", + }, + ), + }, ) @@ -550,6 +527,142 @@ def _MITgcm_netcdf(): ) +def _MITgcm_mds(): + """MITgcm model dataset in native MDS format""" + return xr.Dataset( + { + "U": ( + ["time", "Z", "YC", "XG"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "standard_name": "sea_water_x_velocity", + "mate": "V", + "long_name": "Zonal Component of Velocity", + "units": "m s-1", + }, + ), + "V": ( + ["time", "Z", "YG", "XC"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "standard_name": "sea_water_y_velocity", + "mate": "U", + "long_name": "Meridional Component of Velocity", + "units": "m s-1", + }, + ), + "W": ( + ["time", "Zl", "YC", "XC"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "standard_name": "sea_water_z_velocity", + "long_name": "Vertical Component of Velocity", + "units": "m s-1", + }, + ), + "S": ( + ["time", "Z", "YC", "XC"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "standard_name": "sea_water_salinity", + "long_name": "Salinity", + "units": "g kg-1", + }, + ), + "T": ( + ["time", "Z", "YC", "XC"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "standard_name": "sea_water_potential_temperature", + "long_name": "Potential Temperature", + "units": "degree_Celcius", + }, + ), + }, + coords={ + "time": ( + ["time"], + np.arange(T) * np.timedelta64(1, "D"), + { + "standard_name": "time", + "long_name": "Time", + "axis": "T", + "calendar": "gregorian", + }, + ), + "Z": ( + ["Z"], + np.linspace(-25, -5000, Z, dtype="float64"), + { + "standard_name": "depth", + "long_name": "vertical coordinate of cell center", + "units": "m", + "positive": "down", + "axis": "Z", + }, + ), + "Zl": ( + ["Zl"], + np.linspace(0, -4500, Z, dtype="float64"), + { + "standard_name": "depth_at_lower_w_location", + "long_name": "vertical coordinate of lower cell interface", + "units": "m", + "positive": "down", + "axis": "Z", + "c_grid_axis_shift": -0.5, + }, + ), + "YC": ( + ["YC"], + np.linspace(500, 5000, Y, dtype="float64"), + { + "standard_name": "latitude", + "long_name": "latitude", + "units": "degrees_north", + "coordinate": "YC XC", + "axis": "Y", + }, + ), + "YG": ( + ["YG"], + np.linspace(0, 5000, Y, dtype="float64"), + { + "standard_name": "latitude_at_f_location", + "long_name": "latitude", + "units": "degrees_north", + "coordinate": "YG XG", + "axis": "Y", + "c_grid_axis_shift": -0.5, + }, + ), + "XC": ( + ["XC"], + np.linspace(500, 5000, X, dtype="float64"), + { + "standard_name": "longitude", + "long_name": "longitude", + "units": "degrees_east", + "coordinate": "YC XC", + "axis": "X", + }, + ), + "XG": ( + ["XG"], + np.linspace(0, 5000, X, dtype="float64"), + { + "standard_name": "longitude_at_f_location", + "long_name": "longitude", + "units": "degrees_east", + "coordinate": "YG XG", + "axis": "X", + "c_grid_axis_shift": -0.5, + }, + ), + }, + ) + + def _ERA5_wind(): """ERA5 10m wind model dataset""" return xr.Dataset( @@ -663,7 +776,7 @@ def _hycom_espc(): "standard_name": "eastward_sea_water_velocity", "units": "m/s", "NAVO_code": 17, - "actual_range": [-3.3700001, 3.6840003], + "actual_range": np.array([-3.3700001, 3.6840003], dtype="float32"), "cell_methods": "time: mean", }, ), @@ -925,7 +1038,7 @@ def _ecco4(): "units": "degrees_north", "coordinate": "YG XG", "coverage_content_type": "coordinate", - "comment": "Nonuniform grid spacing. Note: 'southwest' does not correspond to geographic orientation but is used for convenience to describe the computational grid. See MITgcm documentation for details.", + "comment": "Nonuniform grid spacing. Note: 'southwest' does not correspond to geographic orientation but is used for convenience to describe the computational grid. See MITgcm dcoumentation for details.", }, ), "XC": ( @@ -950,7 +1063,7 @@ def _ecco4(): "units": "degrees_east", "coordinate": "YG XG", "coverage_content_type": "coordinate", - "comment": "Nonuniform grid spacing. Note: 'southwest' does not correspond to geographic orientation but is used for convenience to describe the computational grid. See MITgcm documentation for details.", + "comment": "Nonuniform grid spacing. Note: 'southwest' does not correspond to geographic orientation but is used for convenience to describe the computational grid. See MITgcm dcoumentation for details.", }, ), }, @@ -1008,7 +1121,7 @@ def _CROCO_idealized(): { "long_name": "free-surface", "units": "meter", - "field": "free_surface, scalar, series", + "field": "free-surface, scalar, series", "standard_name": "sea_surface_height", }, ), @@ -1067,7 +1180,7 @@ def _CROCO_idealized(): ["eta_rho"], np.arange(Y, dtype="float32"), { - "long name": "y-dimension of the grid", + "long_name": "y-dimension of the grid", "standard_name": "y_grid_index", "axis": "Y", "c_grid_dynamic_range": f"2:{Y}", @@ -1077,8 +1190,8 @@ def _CROCO_idealized(): ["eta_v"], np.arange(Y - 1, dtype="float32"), { - "long name": "y-dimension of the grid at v location", - "standard_name": "y_grid_index_at_v_location", + "long_name": "y-dimension of the grid at v location", + "standard_name": "x_grid_index_at_v_location", "axis": "Y", "c_grid_axis_shift": 0.5, "c_grid_dynamic_range": f"2:{Y - 1}", @@ -1088,7 +1201,7 @@ def _CROCO_idealized(): ["xi_rho"], np.arange(X, dtype="float32"), { - "long name": "x-dimension of the grid", + "long_name": "x-dimension of the grid", "standard_name": "x_grid_index", "axis": "X", "c_grid_dynamic_range": f"2:{X}", @@ -1098,7 +1211,7 @@ def _CROCO_idealized(): ["xi_u"], np.arange(X - 1, dtype="float32"), { - "long name": "x-dimension of the grid at u location", + "long_name": "x-dimension of the grid at u location", "standard_name": "x_grid_index_at_u_location", "axis": "X", "c_grid_axis_shift": 0.5, @@ -1122,7 +1235,7 @@ def _CROCO_idealized(): "long_name": "y-locations of RHO-points", "units": "meter", "standard_name": "plane_y_coordinate", - "field": "y_rho, scalar", + "field": "y_rho, scal", }, ), }, @@ -1131,11 +1244,12 @@ def _CROCO_idealized(): datasets = { "ds_copernicusmarine": _copernicusmarine(), - "ds_copernicusmarine_globcurrents": _copernicusmarine_globcurrents(), + "ds_copernicusmarine_globcurrent": _copernicusmarine_globcurrent(), "ds_NEMO_MOI_U": _NEMO_MOI_U(), "ds_NEMO_MOI_V": _NEMO_MOI_V(), "ds_CESM": _CESM(), "ds_MITgcm_netcdf": _MITgcm_netcdf(), + "ds_MITgcm_mds": _MITgcm_mds(), "ds_ERA5_wind": _ERA5_wind(), "ds_FES_tides": _FES_tides(), "ds_hycom_espc": _hycom_espc(), diff --git a/parcels/_datasets/utils.py b/parcels/_datasets/utils.py index 0ced45baee..f215377f2c 100644 --- a/parcels/_datasets/utils.py +++ b/parcels/_datasets/utils.py @@ -67,13 +67,32 @@ def dataset_repr_diff(ds1: xr.Dataset, ds2: xr.Dataset) -> str: return "".join(diff) -def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): +def _dicts_equal(d1, d2): + # compare two dictionaries, including when their entries are lists or arrays ( == throws an error then) + if d1.keys() != d2.keys(): + return False + for k in d1: + v1, v2 = d1[k], d2[k] + # Compare lists or arrays element-wise + if isinstance(v1, (list, np.ndarray)) and isinstance(v2, (list, np.ndarray)): + if not np.array_equal(np.array(v1), np.array(v2)): + return False + else: + if v1 != v2: + return False + return True + + +def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2", verbose=True): print(f"Comparing {ds1_name} and {ds2_name}\n") - # Compare dataset attributes - print("Dataset Attributes Comparison:") + def verbose_print(*args, **kwargs): + if verbose: + print(*args, **kwargs) + + verbose_print("Dataset Attributes Comparison:") if ds1.attrs == ds2.attrs: - print(" Dataset attributes are identical.") + verbose_print(" Dataset attributes are identical.") else: print(" Dataset attributes differ.") for attr_name in set(ds1.attrs.keys()) | set(ds2.attrs.keys()): @@ -85,14 +104,14 @@ def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): print(f" Attribute '{attr_name}' differs:") print(f" {ds1_name}: {ds1.attrs[attr_name]}") print(f" {ds2_name}: {ds2.attrs[attr_name]}") - print("-" * 30) + verbose_print("-" * 30) # Compare dimensions - print("Dimensions Comparison:") + verbose_print("Dimensions Comparison:") ds1_dims = set(ds1.dims) ds2_dims = set(ds2.dims) if ds1_dims == ds2_dims: - print(" Dimension names are identical.") + verbose_print(" Dimension names are identical.") else: print(" Dimension names differ:") print(f" {ds1_name} dims: {sorted(list(ds1_dims))}") @@ -101,28 +120,35 @@ def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): # For common dimensions, compare order (implicit by comparing coordinate values for sortedness) # and size (though size is parameterized and expected to be different) for dim_name in ds1_dims.intersection(ds2_dims): - print(f" Dimension '{dim_name}':") + verbose_print(f" Dimension '{dim_name}':") # Sizes will differ due to DIM_SIZE, so we don't strictly compare them. - print(f" {ds1_name} size: {ds1.dims[dim_name]}, {ds2_name} size: {ds2.dims[dim_name]}") + verbose_print(f" {ds1_name} size: {ds1.dims[dim_name]}, {ds2_name} size: {ds2.dims[dim_name]}") # Check if coordinates associated with dimensions are sorted (increasing) if dim_name in ds1.coords and dim_name in ds2.coords: - is_ds1_sorted = np.all(np.diff(ds1[dim_name].values) >= 0) if len(ds1[dim_name].values) > 1 else True - is_ds2_sorted = np.all(np.diff(ds2[dim_name].values) >= 0) if len(ds2[dim_name].values) > 1 else True + check_val = ( + np.timedelta64(0, "s") if isinstance(ds1[dim_name].values[0], (np.datetime64, np.timedelta64)) else 0.0 + ) + is_ds1_sorted = ( + np.all(np.diff(ds1[dim_name].values) >= check_val) if len(ds1[dim_name].values) > 1 else True + ) + is_ds2_sorted = ( + np.all(np.diff(ds2[dim_name].values) >= check_val) if len(ds2[dim_name].values) > 1 else True + ) if is_ds1_sorted == is_ds2_sorted: - print(f" Order for '{dim_name}' is consistent (both sorted: {is_ds1_sorted})") + verbose_print(f" Order for '{dim_name}' is consistent (both sorted: {is_ds1_sorted})") else: print( f" Order for '{dim_name}' differs: {ds1_name} sorted: {is_ds1_sorted}, {ds2_name} sorted: {is_ds2_sorted}" ) - print("-" * 30) + verbose_print("-" * 30) # Compare variables (name, attributes, dimensions used) - print("Variables Comparison:") + verbose_print("Variables Comparison:") ds1_vars = set(ds1.variables.keys()) ds2_vars = set(ds2.variables.keys()) if ds1_vars == ds2_vars: - print(" Variable names are identical.") + verbose_print(" Variable names are identical.") else: print(" Variable names differ:") print(f" {ds1_name} vars: {sorted(list(ds1_vars - ds2_vars))}") @@ -130,13 +156,13 @@ def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): print(f" Common vars: {sorted(list(ds1_vars.intersection(ds2_vars)))}") for var_name in ds1_vars.intersection(ds2_vars): - print(f" Variable '{var_name}':") + verbose_print(f" Variable '{var_name}':") var1 = ds1[var_name] var2 = ds2[var_name] # Compare attributes - if var1.attrs == var2.attrs: - print(" Attributes are identical.") + if _dicts_equal(var1.attrs, var2.attrs): + verbose_print(" Attributes are identical.") else: print(" Attributes differ.") for attr_name in set(var1.attrs.keys()) | set(var2.attrs.keys()): @@ -151,9 +177,9 @@ def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): # Compare dimensions used by the variable if var1.dims == var2.dims: - print(f" Dimensions used are identical: {var1.dims}") + verbose_print(f" Dimensions used are identical: {var1.dims}") else: print(" Dimensions used differ:") print(f" {ds1_name}: {var1.dims}") print(f" {ds2_name}: {var2.dims}") - print("=" * 30 + " End of Comparison " + "=" * 30) + verbose_print("=" * 30 + " End of Comparison " + "=" * 30)