diff --git a/parcels/examples/example_dask_chunk_OCMs.py b/parcels/examples/example_dask_chunk_OCMs.py new file mode 100644 index 0000000000..18b022595e --- /dev/null +++ b/parcels/examples/example_dask_chunk_OCMs.py @@ -0,0 +1,165 @@ +import math +from datetime import timedelta as delta +from glob import glob +from os import path + +import numpy as np +import pytest +import dask + +from parcels import AdvectionRK4 +from parcels import FieldSet +from parcels import JITParticle +from parcels import ParticleFile +from parcels import ParticleSet +from parcels import ScipyParticle + +ptype = {'scipy': ScipyParticle, 'jit': JITParticle} + + +def fieldset_from_nemo_3D(chunk_mode): + data_path = path.join(path.dirname(__file__), 'NemoNorthSeaORCA025-N006_data/') + ufiles = sorted(glob(data_path + 'ORCA*U.nc')) + vfiles = sorted(glob(data_path + 'ORCA*V.nc')) + wfiles = sorted(glob(data_path + 'ORCA*W.nc')) + mesh_mask = data_path + 'coordinates.nc' + + filenames = {'U': {'lon': mesh_mask, 'lat': mesh_mask, 'depth': wfiles[0], 'data': ufiles}, + 'V': {'lon': mesh_mask, 'lat': mesh_mask, 'depth': wfiles[0], 'data': vfiles}, + 'W': {'lon': mesh_mask, 'lat': mesh_mask, 'depth': wfiles[0], 'data': wfiles}} + + variables = {'U': 'uo', + 'V': 'vo', + 'W': 'wo'} + dimensions = {'U': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthw', 'time': 'time_counter'}, + 'V': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthw', 'time': 'time_counter'}, + 'W': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthw', 'time': 'time_counter'}} + chs = False + if chunk_mode == 'auto': + chs = 'auto' + elif chunk_mode == 'specific': + chs = {'U': {'depthu': 75, 'y': 16, 'x': 16}, + 'V': {'depthv': 75, 'y': 16, 'x': 16}, + 'W': {'depthw': 75, 'y': 16, 'x': 16}} + + fieldset = FieldSet.from_nemo(filenames, variables, dimensions, field_chunksize=chs) + return fieldset + + +def fieldset_from_globcurrent(chunk_mode): + filename = path.join(path.dirname(__file__), 'GlobCurrent_example_data', + '200201*-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc') + variables = {'U': 'eastward_eulerian_current_velocity', 'V': 'northward_eulerian_current_velocity'} + dimensions = {'lat': 'lat', 'lon': 'lon', 'time': 'time'} + chs = False + if chunk_mode == 'auto': + chs = 'auto' + elif chunk_mode == 'specific': + chs = {'U': {'lat': 16, 'lon': 16}, + 'V': {'lat': 16, 'lon': 16}} + + fieldset = FieldSet.from_netcdf(filename, variables, dimensions, field_chunksize=chs) + return fieldset + + +# ==== undefined as long as we have no POP example data ==== # +def test_pop(): + pass + + +def compute_nemo_particle_advection(field_set, mode, lonp, latp): + + def periodicBC(particle, fieldSet, time): + if particle.lon > 15.0: + particle.lon -= 15.0 + if particle.lon < 0: + particle.lon += 15.0 + if particle.lat > 60.0: + particle.lat -= 11.0 + if particle.lat < 49.0: + particle.lat += 11.0 + + pset = ParticleSet.from_list(field_set, ptype[mode], lon=lonp, lat=latp) + pfile = ParticleFile("nemo_particles_chunk", pset, outputdt=delta(days=1)) + kernels = pset.Kernel(AdvectionRK4) + periodicBC + pset.execute(kernels, runtime=delta(days=4), dt=delta(hours=6), output_file=pfile) + return pset + + +def compute_globcurrent_particle_advection(field_set, mode, lonp, latp): + pset = ParticleSet(field_set, pclass=ptype[mode], lon=lonp, lat=latp) + pfile = ParticleFile("globcurrent_particles_chunk", pset, outputdt=delta(hours=2)) + pset.execute(AdvectionRK4, runtime=delta(days=1), dt=delta(minutes=5), output_file=pfile) + return pset + + +@pytest.mark.parametrize('mode', ['jit']) # Only testing jit as scipy is very slow +@pytest.mark.parametrize('chunk_mode', [False, 'auto', 'specific']) # Only testing jit as scipy is very slow +def test_nemo_3D(mode, chunk_mode): + if chunk_mode == 'auto': + dask.config.set({'array.chunk-size': '2MiB'}) + else: + dask.config.set({'array.chunk-size': '128MiB'}) + field_set = fieldset_from_nemo_3D(chunk_mode) + # Now run particles as normal + npart = 20 + lonp = 5.2 * np.ones(npart) + latp = [i for i in 52.0+(-1e-3+np.random.rand(npart)*2.0*1e-3)] + compute_nemo_particle_advection(field_set, mode, lonp, latp) + # Nemo sample file dimensions: depthu=75, y=201, x=151 + assert (len(field_set.U.grid.load_chunk) == len(field_set.V.grid.load_chunk)) + assert (len(field_set.U.grid.load_chunk) == len(field_set.W.grid.load_chunk)) + if chunk_mode is False: + assert (len(field_set.U.grid.load_chunk) == 1) + elif chunk_mode == 'auto': + assert (len(field_set.U.grid.load_chunk) != 1) + elif chunk_mode == 'specific': + assert (len(field_set.U.grid.load_chunk) == (1 * int(math.ceil(201.0/16.0)) * int(math.ceil(151.0/16.0)))) + + +@pytest.mark.parametrize('mode', ['jit']) # Only testing jit as scipy is very slow +@pytest.mark.parametrize('chunk_mode', [False, 'auto', 'specific']) # Only testing jit as scipy is very slow +def test_globcurrent_2D(mode, chunk_mode): + if chunk_mode == 'auto': + dask.config.set({'array.chunk-size': '32KiB'}) + else: + dask.config.set({'array.chunk-size': '128MiB'}) + field_set = fieldset_from_globcurrent(chunk_mode) + lonp = [25] + latp = [-35] + pset = compute_globcurrent_particle_advection(field_set, mode, lonp, latp) + # Nemo sample file dimensions: time=UNLIMITED, lat=41, lon=81 + assert (len(field_set.U.grid.load_chunk) == len(field_set.V.grid.load_chunk)) + if chunk_mode is False: + assert (len(field_set.U.grid.load_chunk) == 1) + elif chunk_mode == 'auto': + assert (len(field_set.U.grid.load_chunk) != 1) + elif chunk_mode == 'specific': + assert (len(field_set.U.grid.load_chunk) == (1 * int(math.ceil(41.0/16.0)) * int(math.ceil(81.0/16.0)))) + assert(abs(pset[0].lon - 23.8) < 1) + assert(abs(pset[0].lat - -35.3) < 1) + + +@pytest.mark.parametrize('mode', ['jit']) # Only testing jit as scipy is very slow +def test_diff_entry_dimensions_chunks(mode): + data_path = path.join(path.dirname(__file__), 'NemoNorthSeaORCA025-N006_data/') + ufiles = sorted(glob(data_path + 'ORCA*U.nc')) + vfiles = sorted(glob(data_path + 'ORCA*V.nc')) + mesh_mask = data_path + 'coordinates.nc' + + filenames = {'U': {'lon': mesh_mask, 'lat': mesh_mask, 'data': ufiles}, + 'V': {'lon': mesh_mask, 'lat': mesh_mask, 'data': vfiles}} + variables = {'U': 'uo', + 'V': 'vo'} + dimensions = {'U': {'lon': 'glamf', 'lat': 'gphif', 'time': 'time_counter'}, + 'V': {'lon': 'glamf', 'lat': 'gphif', 'time': 'time_counter'}} + chs = {'U': {'depthu': 75, 'y': 16, 'x': 16}, + 'V': {'depthv': 75, 'y': 16, 'x': 16}} + fieldset = FieldSet.from_nemo(filenames, variables, dimensions, field_chunksize=chs) + # Now run particles as normal + npart = 20 + lonp = 5.2 * np.ones(npart) + latp = [i for i in 52.0+(-1e-3+np.random.rand(npart)*2.0*1e-3)] + compute_nemo_particle_advection(fieldset, mode, lonp, latp) + # Nemo sample file dimensions: depthu=75, y=201, x=151 + assert (len(fieldset.U.grid.load_chunk) == len(fieldset.V.grid.load_chunk)) diff --git a/parcels/examples/example_nemo_curvilinear.py b/parcels/examples/example_nemo_curvilinear.py index 40abaeb2c7..13c08ba38c 100644 --- a/parcels/examples/example_nemo_curvilinear.py +++ b/parcels/examples/example_nemo_curvilinear.py @@ -1,3 +1,4 @@ +import math from argparse import ArgumentParser from datetime import timedelta as delta from glob import glob @@ -5,6 +6,7 @@ import numpy as np import pytest +import dask from parcels import AdvectionRK4 from parcels import FieldSet @@ -12,6 +14,7 @@ from parcels import ParticleFile from parcels import ParticleSet from parcels import ScipyParticle +from parcels import ErrorCode ptype = {'scipy': ScipyParticle, 'jit': JITParticle} @@ -28,7 +31,7 @@ def run_nemo_curvilinear(mode, outfile): 'data': data_path + 'V_purely_zonal-ORCA025_grid_V.nc4'}} variables = {'U': 'U', 'V': 'V'} dimensions = {'lon': 'glamf', 'lat': 'gphif'} - field_chunksize = {'lon': 2, 'lat': 2} + field_chunksize = {'y': 2, 'x': 2} field_set = FieldSet.from_nemo(filenames, variables, dimensions, field_chunksize=field_chunksize) assert field_set.U.field_chunksize == field_chunksize @@ -101,6 +104,105 @@ def test_nemo_3D_samegrid(): assert fieldset.U.dataFiles is not fieldset.W.dataFiles +def fieldset_nemo_setup(): + data_path = path.join(path.dirname(__file__), 'NemoNorthSeaORCA025-N006_data/') + ufiles = sorted(glob(data_path + 'ORCA*U.nc')) + vfiles = sorted(glob(data_path + 'ORCA*V.nc')) + wfiles = sorted(glob(data_path + 'ORCA*W.nc')) + mesh_mask = data_path + 'coordinates.nc' + + filenames = {'U': {'lon': mesh_mask, 'lat': mesh_mask, 'depth': wfiles[0], 'data': ufiles}, + 'V': {'lon': mesh_mask, 'lat': mesh_mask, 'depth': wfiles[0], 'data': vfiles}, + 'W': {'lon': mesh_mask, 'lat': mesh_mask, 'depth': wfiles[0], 'data': wfiles}} + variables = {'U': 'uo', + 'V': 'vo', + 'W': 'wo'} + dimensions = {'U': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthw', 'time': 'time_counter'}, + 'V': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthw', 'time': 'time_counter'}, + 'W': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthw', 'time': 'time_counter'}} + + return filenames, variables, dimensions + + +def compute_particle_advection(field_set, mode, lonp, latp): + + def periodicBC(particle, fieldSet, time): + if particle.lon > 15.0: + particle.lon -= 15.0 + if particle.lon < 0: + particle.lon += 15.0 + if particle.lat > 60.0: + particle.lat -= 11.0 + if particle.lat < 49.0: + particle.lat += 11.0 + + def OutOfBounds_reinitialisation(particle, fieldset, time): + particle.lat = 5.2 + particle.lon = 52.0 + (-1e-3 + np.random.rand() * 2.0 * 1e-3) + + pset = ParticleSet.from_list(field_set, ptype[mode], lon=lonp, lat=latp) + pfile = ParticleFile("nemo_particles", pset, outputdt=delta(days=1)) + kernels = pset.Kernel(AdvectionRK4) + periodicBC + pset.execute(kernels, runtime=delta(days=4), dt=delta(hours=6), + output_file=pfile, recovery={ErrorCode.ErrorOutOfBounds: OutOfBounds_reinitialisation}) + return pset + + +@pytest.mark.parametrize('mode', ['jit']) # Only testing jit as scipy is very slow +def test_nemo_curvilinear_auto_chunking(mode): + dask.config.set({'array.chunk-size': '2MiB'}) + filenames, variables, dimensions = fieldset_nemo_setup() + field_set = FieldSet.from_nemo(filenames, variables, dimensions, field_chunksize='auto') + assert field_set.U.dataFiles is not field_set.W.dataFiles + # Now run particles as normal + npart = 20 + lonp = 5.2 * np.ones(npart) + latp = [i for i in 52.0+(-1e-3+np.random.rand(npart)*2.0*1e-3)] + compute_particle_advection(field_set, mode, lonp, latp) + # Nemo sample file dimensions: depthu=75, y=201, x=151 + assert (len(field_set.U.grid.load_chunk) == len(field_set.V.grid.load_chunk)) + assert (len(field_set.U.grid.load_chunk) == len(field_set.W.grid.load_chunk)) + assert (len(field_set.U.grid.load_chunk) != 1) + + +@pytest.mark.parametrize('mode', ['jit']) # Only testing jit as scipy is very slow +def test_nemo_curvilinear_no_chunking(mode): + dask.config.set({'array.chunk-size': '128MiB'}) + filenames, variables, dimensions = fieldset_nemo_setup() + field_set = FieldSet.from_nemo(filenames, variables, dimensions, field_chunksize=False) + assert field_set.U.dataFiles is not field_set.W.dataFiles + # Now run particles as normal + npart = 20 + lonp = 5.2 * np.ones(npart) + latp = [i for i in 52.0+(-1e-3+np.random.rand(npart)*2.0*1e-3)] + compute_particle_advection(field_set, mode, lonp, latp) + # Nemo sample file dimensions: depthu=75, y=201, x=151 + assert (len(field_set.U.grid.load_chunk) == len(field_set.V.grid.load_chunk)) + assert (len(field_set.U.grid.load_chunk) == len(field_set.W.grid.load_chunk)) + assert (len(field_set.U.grid.load_chunk) == 1) + + +@pytest.mark.parametrize('mode', ['jit']) # Only testing jit as scipy is very slow +def test_nemo_curvilinear_specific_chunking(mode): + dask.config.set({'array.chunk-size': '128MiB'}) + filenames, variables, dimensions = fieldset_nemo_setup() + chs = {'U': {'depthu': 75, 'y': 16, 'x': 16}, + 'V': {'depthv': 75, 'y': 16, 'x': 16}, + 'W': {'depthw': 75, 'y': 16, 'x': 16}} + + field_set = FieldSet.from_nemo(filenames, variables, dimensions, field_chunksize=chs) + assert field_set.U.dataFiles is not field_set.W.dataFiles + # Now run particles as normal + npart = 20 + lonp = 5.2 * np.ones(npart) + latp = [i for i in 52.0+(-1e-3+np.random.rand(npart)*2.0*1e-3)] + compute_particle_advection(field_set, mode, lonp, latp) + # Nemo sample file dimensions: depthu=75, y=201, x=151 + assert (len(field_set.U.grid.load_chunk) == len(field_set.V.grid.load_chunk)) + assert (len(field_set.U.grid.load_chunk) == len(field_set.W.grid.load_chunk)) + assert (len(field_set.U.grid.load_chunk) == (1 * int(math.ceil(201.0/16.0)) * int(math.ceil(151.0/16.0)))) + + if __name__ == "__main__": p = ArgumentParser(description="""Chose the mode using mode option""") p.add_argument('--mode', choices=('scipy', 'jit'), nargs='?', default='jit', diff --git a/parcels/field.py b/parcels/field.py index 1e188dc74b..b93e823a0e 100644 --- a/parcels/field.py +++ b/parcels/field.py @@ -30,6 +30,7 @@ from parcels.tools.error import FieldSamplingError from parcels.tools.error import TimeExtrapolationError from parcels.tools.loggers import logger +from netCDF4 import Dataset as ncDataset __all__ = ['Field', 'VectorField', 'SummedField', 'NestedField'] @@ -1587,8 +1588,9 @@ def __getitem__(self, key): class NetcdfFileBuffer(object): _name_maps = {'lon': ['lon', 'nav_lon', 'x', 'longitude', 'lo', 'ln'], 'lat': ['lat', 'nav_lat', 'y', 'latitude', 'la', 'lt'], - 'depth': ['depth', 'depthu', 'depths', 'depthw', 'depthz', 'z', 'd'], + 'depth': ['depth', 'depthu', 'depthv', 'depthw', 'depths', 'deptht', 'depthx', 'depthy', 'depthz', 'z', 'd'], 'time': ['time', 'time_count', 'time_counter', 'timer_count', 't']} + _min_dim_chunksize = 16 """ Class that encapsulates and manages deferred access to file data. """ def __init__(self, filename, dimensions, indices, netcdf_engine, timestamp=None, @@ -1697,7 +1699,12 @@ def _get_initial_chunk_dictionary(self): if 'array.chunk-size' in da_conf.config.keys(): chunk_cap = da_utils.parse_bytes(da_conf.config.get('array.chunk-size')) else: - logger.info_once("Unable to locate chunking hints from dask, thus estimating the max. chunk size heuristically. Please consider defining the 'chunk-size' for 'array' in your local dask configuration file (see http://oceanparcels.org/faq.html#field_chunking_config and https://docs.dask.org).") + predefined_cap = da_conf.get('array.chunk-size') + if predefined_cap is not None: + chunk_cap = da_utils.parse_bytes(predefined_cap) + else: + logger.info_once("Unable to locate chunking hints from dask, thus estimating the max. chunk size heuristically. Please consider defining the 'chunk-size' for 'array' in your local dask configuration file (see http://oceanparcels.org/faq.html#field_chunking_config and https://docs.dask.org).") + loni, lonname, lonvalue = self._is_dimension_in_dataset('lon') lati, latname, latvalue = self._is_dimension_in_dataset('lat') if lati >= 0 and loni >= 0: @@ -1712,6 +1719,35 @@ def _get_initial_chunk_dictionary(self): init_chunk_dict[depthname] = max(1, depthvalue) # ==== closing check-opened requested dataset ==== # self.dataset.close() + # ==== check if the chunksize reading is successfull. if not, load the file ONCE really into memory and deduce the chunking from the array dims ==== # + try: + self.dataset = xr.open_dataset(str(self.filename), decode_cf=True, engine=self.netcdf_engine, chunks=init_chunk_dict, lock=False) + except: + # ==== fail - open it as a normal array and deduce the dimensions from the read field ==== # + init_chunk_dict = {} + self.dataset = ncDataset(str(self.filename)) + refdims = self.dataset.dimensions.keys() + max_field = "" + max_dim_names = () + max_overlay_dims = 0 + for vname in self.dataset.variables: + var = self.dataset.variables[vname] + overlay_dims = [] + for vdname in var.dimensions: + if vdname in refdims: + overlay_dims.append(vdname) + n_overlay_dims = len(overlay_dims) + if n_overlay_dims > max_overlay_dims: + max_field = vname + max_dim_names = tuple(overlay_dims) + max_overlay_dims = n_overlay_dims + self.name = max_field + for dname in max_dim_names: + # if dname in self._name_maps['time'] and self.dataset.dimensions[dname].size == 1: + # continue + init_chunk_dict[dname] = min(self._min_dim_chunksize, self.dataset.dimensions[dname].size) + finally: + self.dataset.close() self.dataset = None # ==== self.dataset not available ==== # return init_chunk_dict @@ -1719,7 +1755,7 @@ def _get_initial_chunk_dictionary(self): def _is_dimension_available(self, dimension_name): if self.dimensions is None or self.dataset is None: return False - return (dimension_name in self.dimensions and self.dimensions[dimension_name] in self.dataset.dims) + return dimension_name in self.dimensions def _is_dimension_in_dataset(self, dimension_name): k, dname, dvalue = (-1, '', 0) @@ -1769,7 +1805,7 @@ def _chunksize_to_chunkmap(self): self.chunk_mapping[dim_index] = lonvalue dim_index += 1 elif len(self.field_chunksize) >= 3: - if depthi >= 0: + if depthi >= 0 and self._is_dimension_available('depth'): # self.chunk_mapping[dim_index] = self.field_chunksize[self.dimensions['depth']] self.chunk_mapping[dim_index] = depthvalue dim_index += 1 @@ -1790,10 +1826,12 @@ def _chunkmap_to_chunksize(self): self.field_chunksize[self.dimensions['lon']] = chunk_map[1] elif len(chunk_map) == 3: chunk_dim_index = 0 - if self._is_dimension_available('depth') or (depthi >= 0 and depthvalue > 1): + # if self._is_dimension_available('depth') or (depthi >= 0 and depthvalue > 1): + if depthi >= 0 and depthvalue > 1 and self._is_dimension_available('depth'): self.field_chunksize[self.dimensions['depth']] = chunk_map[chunk_dim_index] chunk_dim_index += 1 - elif self._is_dimension_available('time') or (timei >= 0 and timevalue > 1): + # elif self._is_dimension_available('time') or (timei >= 0 and timevalue > 1): + elif timei >= 0 and timevalue > 1 and self._is_dimension_available('time'): self.field_chunksize[self.dimensions['time']] = chunk_map[chunk_dim_index] chunk_dim_index += 1 self.field_chunksize[self.dimensions['lat']] = chunk_map[chunk_dim_index] @@ -1959,11 +1997,11 @@ def data_access(self): chunkIndex = 0 timei, _, timevalue = self._is_dimension_in_dataset('time') depthi, _, depthvalue = self._is_dimension_in_dataset('depth') - has_time = timei >= 0 and timevalue > 1 - has_depth = depthi >= 0 and depthvalue > 1 + has_time = timei >= 0 and timevalue > 1 and self._is_dimension_available('time') + has_depth = depthi >= 0 and depthvalue > 1 and self._is_dimension_available('depth') startblock = 0 - startblock += 1 if has_time and not self._is_dimension_available('time') else 0 - startblock += 1 if has_depth and not self._is_dimension_available('depth') else 0 + startblock += 1 if has_time else 0 + startblock += 1 if has_depth else 0 for chunkDim in data.chunksize[startblock:]: self.chunk_mapping[chunkIndex] = chunkDim chunkIndex += 1 diff --git a/parcels/fieldset.py b/parcels/fieldset.py index 315085ff1d..d800f435df 100644 --- a/parcels/fieldset.py +++ b/parcels/fieldset.py @@ -206,7 +206,8 @@ def parse_wildcards(cls, paths, filenames, var): @classmethod def from_netcdf(cls, filenames, variables, dimensions, indices=None, fieldtype=None, - mesh='spherical', timestamps=None, allow_time_extrapolation=None, time_periodic=False, deferred_load=True, **kwargs): + mesh='spherical', timestamps=None, allow_time_extrapolation=None, time_periodic=False, + deferred_load=True, field_chunksize='auto', **kwargs): """Initialises FieldSet object from NetCDF files :param filenames: Dictionary mapping variables to file(s). The @@ -274,6 +275,7 @@ def from_netcdf(cls, filenames, variables, dimensions, indices=None, fieldtype=N cls.checkvaliddimensionsdict(dims) inds = indices[var] if (indices and var in indices) else indices fieldtype = fieldtype[var] if (fieldtype and var in fieldtype) else fieldtype + chunksize = field_chunksize[var] if (field_chunksize and var in field_chunksize) else field_chunksize grid = None # check if grid has already been processed (i.e. if other fields have same filenames, dimensions and indices) @@ -298,7 +300,8 @@ def from_netcdf(cls, filenames, variables, dimensions, indices=None, fieldtype=N fields[var] = Field.from_netcdf(paths, (var, name), dims, inds, grid=grid, mesh=mesh, timestamps=timestamps, allow_time_extrapolation=allow_time_extrapolation, time_periodic=time_periodic, deferred_load=deferred_load, - fieldtype=fieldtype, **kwargs) + fieldtype=fieldtype, field_chunksize=chunksize, **kwargs) + u = fields.pop('U', None) v = fields.pop('V', None) return cls(u, v, fields=fields) @@ -306,7 +309,7 @@ def from_netcdf(cls, filenames, variables, dimensions, indices=None, fieldtype=N @classmethod def from_nemo(cls, filenames, variables, dimensions, indices=None, mesh='spherical', allow_time_extrapolation=None, time_periodic=False, - tracer_interp_method='cgrid_tracer', **kwargs): + tracer_interp_method='cgrid_tracer', field_chunksize='auto', **kwargs): """Initialises FieldSet object from NetCDF files of Curvilinear NEMO fields. :param filenames: Dictionary mapping variables to file(s). The @@ -360,7 +363,8 @@ def from_nemo(cls, filenames, variables, dimensions, indices=None, mesh='spheric if 'creation_log' not in kwargs.keys(): kwargs['creation_log'] = 'from_nemo' fieldset = cls.from_c_grid_dataset(filenames, variables, dimensions, mesh=mesh, indices=indices, time_periodic=time_periodic, - allow_time_extrapolation=allow_time_extrapolation, tracer_interp_method=tracer_interp_method, **kwargs) + allow_time_extrapolation=allow_time_extrapolation, tracer_interp_method=tracer_interp_method, + field_chunksize=field_chunksize, **kwargs) if hasattr(fieldset, 'W'): fieldset.W.set_scaling_factor(-1.) return fieldset @@ -368,7 +372,7 @@ def from_nemo(cls, filenames, variables, dimensions, indices=None, mesh='spheric @classmethod def from_c_grid_dataset(cls, filenames, variables, dimensions, indices=None, mesh='spherical', allow_time_extrapolation=None, time_periodic=False, - tracer_interp_method='cgrid_tracer', **kwargs): + tracer_interp_method='cgrid_tracer', field_chunksize='auto', **kwargs): """Initialises FieldSet object from NetCDF files of Curvilinear NEMO fields. :param filenames: Dictionary mapping variables to file(s). The @@ -434,12 +438,13 @@ def from_c_grid_dataset(cls, filenames, variables, dimensions, indices=None, mes kwargs['creation_log'] = 'from_c_grid_dataset' return cls.from_netcdf(filenames, variables, dimensions, mesh=mesh, indices=indices, time_periodic=time_periodic, - allow_time_extrapolation=allow_time_extrapolation, interp_method=interp_method, **kwargs) + allow_time_extrapolation=allow_time_extrapolation, interp_method=interp_method, + field_chunksize=field_chunksize, **kwargs) @classmethod def from_pop(cls, filenames, variables, dimensions, indices=None, mesh='spherical', allow_time_extrapolation=None, time_periodic=False, - tracer_interp_method='bgrid_tracer', **kwargs): + tracer_interp_method='bgrid_tracer', field_chunksize='auto', **kwargs): """Initialises FieldSet object from NetCDF files of POP fields. It is assumed that the velocities in the POP fields is in cm/s. @@ -495,7 +500,8 @@ def from_pop(cls, filenames, variables, dimensions, indices=None, mesh='spherica if 'creation_log' not in kwargs.keys(): kwargs['creation_log'] = 'from_pop' fieldset = cls.from_b_grid_dataset(filenames, variables, dimensions, mesh=mesh, indices=indices, time_periodic=time_periodic, - allow_time_extrapolation=allow_time_extrapolation, tracer_interp_method=tracer_interp_method, **kwargs) + allow_time_extrapolation=allow_time_extrapolation, tracer_interp_method=tracer_interp_method, + field_chunksize=field_chunksize, **kwargs) if hasattr(fieldset, 'U'): fieldset.U.set_scaling_factor(0.01) # cm/s to m/s if hasattr(fieldset, 'V'): @@ -507,7 +513,7 @@ def from_pop(cls, filenames, variables, dimensions, indices=None, mesh='spherica @classmethod def from_b_grid_dataset(cls, filenames, variables, dimensions, indices=None, mesh='spherical', allow_time_extrapolation=None, time_periodic=False, - tracer_interp_method='bgrid_tracer', **kwargs): + tracer_interp_method='bgrid_tracer', field_chunksize='auto', **kwargs): """Initialises FieldSet object from NetCDF files of Bgrid fields. :param filenames: Dictionary mapping variables to file(s). The @@ -575,11 +581,13 @@ def from_b_grid_dataset(cls, filenames, variables, dimensions, indices=None, mes kwargs['creation_log'] = 'from_b_grid_dataset' return cls.from_netcdf(filenames, variables, dimensions, mesh=mesh, indices=indices, time_periodic=time_periodic, - allow_time_extrapolation=allow_time_extrapolation, interp_method=interp_method, **kwargs) + allow_time_extrapolation=allow_time_extrapolation, interp_method=interp_method, + field_chunksize=field_chunksize, **kwargs) @classmethod def from_parcels(cls, basename, uvar='vozocrtx', vvar='vomecrty', indices=None, extra_fields=None, - allow_time_extrapolation=None, time_periodic=False, deferred_load=True, **kwargs): + allow_time_extrapolation=None, time_periodic=False, deferred_load=True, + field_chunksize='auto', **kwargs): """Initialises FieldSet data from NetCDF files using the Parcels FieldSet.write() conventions. :param basename: Base name of the file(s); may contain @@ -618,7 +626,8 @@ def from_parcels(cls, basename, uvar='vozocrtx', vvar='vomecrty', indices=None, for v in extra_fields.keys()]) return cls.from_netcdf(filenames, indices=indices, variables=extra_fields, dimensions=dimensions, allow_time_extrapolation=allow_time_extrapolation, - time_periodic=time_periodic, deferred_load=deferred_load, **kwargs) + time_periodic=time_periodic, deferred_load=deferred_load, + field_chunksize=field_chunksize, **kwargs) @classmethod def from_xarray_dataset(cls, ds, variables, dimensions, mesh='spherical', allow_time_extrapolation=None, diff --git a/parcels/kernel.py b/parcels/kernel.py index 2b9f5ef698..f673d7a6a8 100644 --- a/parcels/kernel.py +++ b/parcels/kernel.py @@ -273,6 +273,7 @@ def execute_python(self, pset, endtime, dt): # Don't execute particles that aren't started yet sign_end_part = np.sign(endtime - p.time) + # Compute min/max dt for first timestep dt_pos = min(abs(p.dt), abs(endtime - p.time)) # ==== numerically stable; also making sure that continuously-recovered particles do end successfully, @@ -352,8 +353,7 @@ def execute(self, pset, endtime, dt, recovery=None, output_file=None): def remove_deleted(pset): """Utility to remove all particles that signalled deletion""" - indices = [i for i, p in enumerate(pset.particles) - if p.state in [ErrorCode.Delete]] + indices = [i for i, p in enumerate(pset.particles) if p.state in [ErrorCode.Delete]] if len(indices) > 0 and output_file is not None: output_file.write(pset[indices], endtime, deleted_only=True) pset.remove(indices) diff --git a/parcels/particle.py b/parcels/particle.py index c30e42455d..ff093b13ba 100644 --- a/parcels/particle.py +++ b/parcels/particle.py @@ -7,9 +7,10 @@ from parcels.tools.error import ErrorCode from parcels.tools.loggers import logger - __all__ = ['ScipyParticle', 'JITParticle', 'Variable'] +indicators_64bit = [np.float64, np.int64, c_void_p] + class Variable(object): """Descriptor class that delegates data access to particle data @@ -47,8 +48,7 @@ def __repr__(self): def is64bit(self): """Check whether variable is 64-bit""" - return True if self.dtype == np.float64 or self.dtype == np.int64 \ - or self.dtype == c_void_p else False + return True if self.dtype in indicators_64bit else False class ParticleType(object): diff --git a/parcels/scripts/get_examples.py b/parcels/scripts/get_examples.py index de6437747f..6ead0aeb2e 100644 --- a/parcels/scripts/get_examples.py +++ b/parcels/scripts/get_examples.py @@ -37,9 +37,9 @@ "U_purely_zonal-ORCA025_grid_U.nc4", "V_purely_zonal-ORCA025_grid_V.nc4", "mesh_mask.nc4"]] + ["NemoNorthSeaORCA025-N006_data/" + fn for fn in [ - "ORCA025-N06_20000104d05U.nc", "ORCA025-N06_20000109d05U.nc", - "ORCA025-N06_20000104d05V.nc", "ORCA025-N06_20000109d05V.nc", - "ORCA025-N06_20000104d05W.nc", "ORCA025-N06_20000109d05W.nc", + "ORCA025-N06_20000104d05U.nc", "ORCA025-N06_20000109d05U.nc", "ORCA025-N06_20000114d05U.nc", "ORCA025-N06_20000119d05U.nc", "ORCA025-N06_20000124d05U.nc", "ORCA025-N06_20000129d05U.nc", + "ORCA025-N06_20000104d05V.nc", "ORCA025-N06_20000109d05V.nc", "ORCA025-N06_20000114d05V.nc", "ORCA025-N06_20000119d05V.nc", "ORCA025-N06_20000124d05V.nc", "ORCA025-N06_20000129d05V.nc", + "ORCA025-N06_20000104d05W.nc", "ORCA025-N06_20000109d05W.nc", "ORCA025-N06_20000114d05W.nc", "ORCA025-N06_20000119d05W.nc", "ORCA025-N06_20000124d05W.nc", "ORCA025-N06_20000129d05W.nc", "coordinates.nc"]] + ["WOA_data/" + fn for fn in ["woa18_decav_t%.2d_04.nc" % m for m in range(1, 13)]])