Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
e92b784
#770 - initial fix-commit for the field sets. Continue running tests …
CKehl Mar 19, 2020
ff761ea
Merge branch 'master' into stabilize_lazy_evaluation_NEMOfields
CKehl Mar 19, 2020
124525c
#774 - added the field-individual setup of the field chunking. Test t…
CKehl Mar 20, 2020
15cad82
Cleaning `field_chunksize` dict code
erikvansebille Mar 20, 2020
f24e639
Fixing flake8 errors
erikvansebille Mar 20, 2020
5eb2304
#774 - local update of MEDUSA
CKehl Mar 23, 2020
4234122
Merge branch 'stabilize_lazy_evaluation_NEMOfields' of github.com:Oce…
CKehl Mar 23, 2020
ec2e1c4
#774 - fixed error in field.py line 1741, trying to compare a number …
CKehl Mar 23, 2020
31614d2
#774 - added chunking tests
CKehl Mar 23, 2020
618e156
#774 - resolved the pytest fixture problem. Now getting OutOfBounds e…
CKehl Mar 23, 2020
20341ad
#774 - fixed test function naming in example_nemo_curvilinear.py
CKehl Mar 23, 2020
ec2a0e5
#774 - tests are breaking cause NetCDF attempts to open and chunk eac…
CKehl Mar 23, 2020
b63fc0d
#774 - returning to former 'test_nemo_3D_samegrid()' test function in…
CKehl Mar 24, 2020
7608a88
#774 - fixed the extents and spawned particles for the NEMO tests
CKehl Mar 24, 2020
1e1de53
#774 - added check-out printouts (commented) to track the defined fie…
CKehl Mar 24, 2020
b9872f0
#774 - fixed bug in FieldSet::from_netcdf that has overwritten the us…
CKehl Mar 24, 2020
a1189e0
#774 - adapted the tests to correctly check the requested chunk sizes
CKehl Mar 24, 2020
fa966b8
#774 - avoided excessive INFO prints for 'Unable to locate chunking h…
CKehl Mar 24, 2020
4f34e4f
#774 - added printouts to track non-auto initialisations for the pala…
CKehl Mar 24, 2020
67c9ff3
#774 - auto-test to only apply chunking to depth or time if they are …
CKehl Mar 24, 2020
d8e3e0e
#774 - commented debug print-outs
CKehl Mar 24, 2020
28b7f1e
#774 - adapted the script/get_examples.py to load more timesteps for …
CKehl Mar 24, 2020
0169424
#774 - fixed the infinite-loop out-of-bounds bug with specified chunk…
CKehl Mar 25, 2020
0a5d8ec
initial commit. This branch is a clone of #774, which suffers from a …
CKehl Mar 26, 2020
a68cb7b
Merge branch 'stabilize_lazy_evaluation_NEMOfields' into stabilize_da…
CKehl Mar 26, 2020
08cd0cd
#776 - fixed a type occuring from the merge in kernel.py, l. 274
CKehl Mar 26, 2020
73d558e
#776 - adding POP tests (tests locally succeed). New data download ad…
CKehl Mar 26, 2020
7450ad6
#776 -fixed PEP8 linter issues and removed unnecessary comments
CKehl Mar 26, 2020
ee7b3d4
#776 - included changes requested from the review
CKehl Mar 27, 2020
a14cbd2
#776 - fixed the PEP8 linter, which occurred from removing large code…
CKehl Mar 27, 2020
6279deb
#776 - returned to having an isotropic chunk size for the NEMO fields…
CKehl Mar 27, 2020
974ef1f
#776 - fix the issue of attempting to initialize sahred-grid fields w…
CKehl Mar 27, 2020
7f11440
Update parcels/examples/example_dask_chunk_OCMs.py
CKehl Mar 30, 2020
536dabe
#776 - applied requested changes from the code review. Also, for debu…
CKehl Mar 30, 2020
a903692
Merge branch 'stabilize_dask_field_loading' of github.com:OceanParcel…
CKehl Mar 30, 2020
9ae93ae
#776 adapted the output message for tracking sameGrid function
CKehl Mar 30, 2020
321f82e
#776 - fixing PEP8 linter unconformities
CKehl Mar 30, 2020
9cb88a9
#776 - still not getting the expected shared-grid output. Modified th…
CKehl Mar 30, 2020
9e0eee8
#776 - patch for handling heterogeneous chunk sizes in shared-grid en…
CKehl Mar 31, 2020
09f440d
#776 - changed name of 'sameGrid' variable in FieldSet::from_netcdf()…
CKehl Mar 31, 2020
0491e05
#776 - moved the break-point for searching for the fitting shared gri…
CKehl Mar 31, 2020
0a5a3d1
#776 - fixes incorrectly caching dataFiles paths.
CKehl Mar 31, 2020
5929c88
#776 - fixes to include dimensions in the chunking that are in the fi…
CKehl Mar 31, 2020
3b608f2
#776 - fixing PE8 linter errors.
CKehl Mar 31, 2020
3dfd6a7
#776 - adapted the NetCDFFileReader to re-format the chunksize if bei…
CKehl Mar 31, 2020
ab10cc4
#776 - adapted the NetCDFFileReader to re-format the chunksize if bei…
CKehl Mar 31, 2020
ab93e24
Adding unit test for sampling 2D fields
erikvansebille Apr 1, 2020
48f3b9b
Adding rectilinear field to 2D sampling test
erikvansebille Apr 1, 2020
3cb9870
Adding line back to set nav_lon to np.ones
erikvansebille Apr 1, 2020
03bedfc
#776 - minor tested and confirmed patches to make palaeo-NEMO case ru…
CKehl Apr 1, 2020
b582b56
#776 - fixing flake8 linter errors again
CKehl Apr 1, 2020
2b51135
Setting correct dtype for nav_lon.data
erikvansebille Apr 1, 2020
47c7fba
#776 - minor tested and confirmed patches to make palaeo-NEMO case ru…
CKehl Apr 1, 2020
f50bb03
Merge branch 'stabilize_dask_field_loading' of github.com:OceanParcel…
CKehl Apr 1, 2020
2cb3e9c
#776 last fixes to only apply chunking to file dimensions that are ac…
CKehl Apr 1, 2020
113fe17
#776 - disabled breaking example causing #782 - warning message to re…
CKehl Apr 1, 2020
46a16ad
#776 fix conversion from dask-internal nchunk conversion to the chunk…
CKehl Apr 1, 2020
f8b6e7c
fixed PEP8 and final adaptations so that chunks and addresses match t…
CKehl Apr 1, 2020
fed13b6
#776 - removed unnecessary comments and print instructions
CKehl Apr 1, 2020
a09df9a
Merge branch 'master' into stabilize_dask_field_loading
erikvansebille Apr 2, 2020
74c9143
updating test_3d_2dfield_sampling after merging #782
erikvansebille Apr 2, 2020
95b866d
#776 - added the possible merger of (chunked) grids via FieldSet.add_…
CKehl Apr 2, 2020
1e18c90
#776 - fixing PEP8 linter errors
CKehl Apr 2, 2020
cf21843
Merge branch 'master' into stabilize_dask_field_loading
erikvansebille Apr 3, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/ci-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ jobs:
./.github/scripts/linux_install.sh
./.github/scripts/osx_test.sh
shell: bash
timeout-minutes: 45
test_linux:
name: Testing on Linux
runs-on: ubuntu-latest
Expand All @@ -36,6 +37,7 @@ jobs:
python -m flake8 parcels/
python -m flake8 tests/
shell: bash
timeout-minutes: 45
test_windows:
name: Testing on Windows
runs-on: windows-2016
Expand All @@ -52,4 +54,5 @@ jobs:
./.github/scripts/windows_install.bat
./.github/scripts/windows_test.bat
shell: powershell
timeout-minutes: 45

415 changes: 415 additions & 0 deletions parcels/examples/example_dask_chunk_OCMs.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion parcels/examples/example_nemo_curvilinear.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def run_nemo_curvilinear(mode, outfile):
'data': data_path + 'V_purely_zonal-ORCA025_grid_V.nc4'}}
variables = {'U': 'U', 'V': 'V'}
dimensions = {'lon': 'glamf', 'lat': 'gphif'}
field_chunksize = {'lon': 2, 'lat': 2}
field_chunksize = {'y': 2, 'x': 2}
field_set = FieldSet.from_nemo(filenames, variables, dimensions, field_chunksize=field_chunksize)
assert field_set.U.field_chunksize == field_chunksize

Expand Down
178 changes: 133 additions & 45 deletions parcels/field.py

Large diffs are not rendered by default.

129 changes: 109 additions & 20 deletions parcels/fieldset.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions parcels/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def __init__(self, lon, lat, time, time_origin, mesh):
self.periods = 0
self.load_chunk = []
self.chunk_info = None
self.master_chunksize = None
self._add_last_periodic_data_timestep = False

@staticmethod
Expand Down
36 changes: 32 additions & 4 deletions parcels/gridset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import numpy as np
import functools
from parcels.tools.loggers import logger

__all__ = ['GridSet']

Expand All @@ -15,20 +17,46 @@ def add_grid(self, field):
grid = field.grid
existing_grid = False
for g in self.grids:
if field.field_chunksize != grid.master_chunksize:
logger.warning_once("Field chunksize and Grid master chunksize are not equal - erroneous behaviour expected.")
break
if g == grid:
existing_grid = True
break
sameGrid = True
sameDims = True
if grid.time_origin != g.time_origin:
sameDims = False
continue
for attr in ['lon', 'lat', 'depth', 'time']:
gattr = getattr(g, attr)
gridattr = getattr(grid, attr)
if gattr.shape != gridattr.shape or not np.allclose(gattr, gridattr):
sameGrid = False
sameDims = False
break
if not sameGrid:
if not sameDims:
continue
existing_grid = True
field.grid = g
break
sameGrid &= (grid.master_chunksize == g.master_chunksize) or (grid.master_chunksize in [False, None] and g.master_chunksize in [False, None])
if not sameGrid and sameDims and grid.master_chunksize is not None:
print(field.field_chunksize)
print(grid.master_chunksize)
print(g.master_chunksize)
res = False
if (isinstance(grid.master_chunksize, tuple) and isinstance(g.master_chunksize, tuple)) or \
(isinstance(grid.master_chunksize, dict) and isinstance(g.master_chunksize, dict)):
res |= functools.reduce(lambda i, j: i and j,
map(lambda m, k: m == k, grid.master_chunksize, g.master_chunksize), True)
if res:
sameGrid = True
logger.warning_once("Trying to initialize a shared grid with different chunking sizes - action prohibited. Replacing requested field_chunksize with grid's master chunksize.")
else:
raise ValueError("Conflict between grids of the same gridset: major grid chunksize and requested sibling-grid chunksize as well as their chunk-dimension names are not equal - Please apply the same chunksize to all fields in a shared grid!")
break
if sameGrid:
existing_grid = True
field.grid = g
break

if not existing_grid:
self.grids.append(grid)
Expand Down
2 changes: 1 addition & 1 deletion parcels/kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def execute_python(self, pset, endtime, dt):

# Don't execute particles that aren't started yet
sign_end_part = np.sign(endtime - particles.time)

# Compute min/max dt for first timestep
dt_pos = min(abs(particles.dt), abs(endtime - particles.time))

# ==== numerically stable; also making sure that continuously-recovered particles do end successfully,
Expand Down
6 changes: 3 additions & 3 deletions parcels/particle.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
from parcels.tools.error import ErrorCode
from parcels.tools.loggers import logger


__all__ = ['ScipyParticle', 'JITParticle', 'Variable']

indicators_64bit = [np.float64, np.int64, c_void_p]


class Variable(object):
"""Descriptor class that delegates data access to particle data
Expand Down Expand Up @@ -47,8 +48,7 @@ def __repr__(self):

def is64bit(self):
"""Check whether variable is 64-bit"""
return True if self.dtype == np.float64 or self.dtype == np.int64 \
or self.dtype == c_void_p else False
return True if self.dtype in indicators_64bit else False


class ParticleType(object):
Expand Down
9 changes: 6 additions & 3 deletions parcels/scripts/get_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,13 @@
"U_purely_zonal-ORCA025_grid_U.nc4", "V_purely_zonal-ORCA025_grid_V.nc4",
"mesh_mask.nc4"]]
+ ["NemoNorthSeaORCA025-N006_data/" + fn for fn in [
"ORCA025-N06_20000104d05U.nc", "ORCA025-N06_20000109d05U.nc",
"ORCA025-N06_20000104d05V.nc", "ORCA025-N06_20000109d05V.nc",
"ORCA025-N06_20000104d05W.nc", "ORCA025-N06_20000109d05W.nc",
"ORCA025-N06_20000104d05U.nc", "ORCA025-N06_20000109d05U.nc", "ORCA025-N06_20000114d05U.nc", "ORCA025-N06_20000119d05U.nc", "ORCA025-N06_20000124d05U.nc", "ORCA025-N06_20000129d05U.nc",
"ORCA025-N06_20000104d05V.nc", "ORCA025-N06_20000109d05V.nc", "ORCA025-N06_20000114d05V.nc", "ORCA025-N06_20000119d05V.nc", "ORCA025-N06_20000124d05V.nc", "ORCA025-N06_20000129d05V.nc",
"ORCA025-N06_20000104d05W.nc", "ORCA025-N06_20000109d05W.nc", "ORCA025-N06_20000114d05W.nc", "ORCA025-N06_20000119d05W.nc", "ORCA025-N06_20000124d05W.nc", "ORCA025-N06_20000129d05W.nc",
"coordinates.nc"]]
+ ["POPSouthernOcean_data/" + fn for fn in ["t.x1_SAMOC_flux.169000.nc", "t.x1_SAMOC_flux.169001.nc",
"t.x1_SAMOC_flux.169002.nc", "t.x1_SAMOC_flux.169003.nc",
"t.x1_SAMOC_flux.169004.nc", "t.x1_SAMOC_flux.169005.nc"]]
+ ["WOA_data/" + fn for fn in ["woa18_decav_t%.2d_04.nc" % m
for m in range(1, 13)]])

Expand Down
92 changes: 91 additions & 1 deletion tests/test_fieldset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from parcels.field import Field, VectorField
from parcels.tools.converters import TimeConverter, _get_cftime_calendars, _get_cftime_datetimes, UnitConverter, GeographicPolar
import dask.array as da
import dask
from datetime import timedelta as delta
import datetime
import numpy as np
Expand Down Expand Up @@ -239,6 +240,90 @@ def test_add_duplicate_field(dupobject):
assert error_thrown


def test_fieldset_samegrids_from_file(tmpdir, filename='test_subsets'):
""" Test for subsetting fieldset from file using indices dict. """
data, dimensions = generate_fieldset(100, 100)
filepath1 = tmpdir.join(filename+'_1')
fieldset1 = FieldSet.from_data(data, dimensions)
fieldset1.write(filepath1)

ufiles = [filepath1+'U.nc', ] * 4
vfiles = [filepath1+'V.nc', ] * 4
timestamps = np.arange(0, 4, 1) * 86400.0
timestamps = np.expand_dims(timestamps, 1)
files = {'U': ufiles, 'V': vfiles}
variables = {'U': 'vozocrtx', 'V': 'vomecrty'}
dimensions = {'lon': 'nav_lon', 'lat': 'nav_lat'}
chs = 'auto'
fieldset = FieldSet.from_netcdf(files, variables, dimensions, timestamps=timestamps, allow_time_extrapolation=True, field_chunksize=chs)

assert fieldset.gridset.size == 1
assert fieldset.U.grid == fieldset.V.grid
assert fieldset.U.grid.master_chunksize == fieldset.V.grid.master_chunksize
assert fieldset.U.field_chunksize == fieldset.V.field_chunksize


def test_fieldset_diffgrids_from_file(tmpdir, filename='test_subsets'):
""" Test for subsetting fieldset from file using indices dict. """
data, dimensions = generate_fieldset(100, 100)
filepath1 = tmpdir.join(filename+'_1')
fieldset1 = FieldSet.from_data(data, dimensions)
fieldset1.write(filepath1)
data, dimensions = generate_fieldset(50, 50)
filepath2 = tmpdir.join(filename + '_2')
fieldset2 = FieldSet.from_data(data, dimensions)
fieldset2.write(filepath2)

ufiles = [filepath1+'U.nc', ] * 4
vfiles = [filepath2+'V.nc', ] * 4
timestamps = np.arange(0, 4, 1) * 86400.0
timestamps = np.expand_dims(timestamps, 1)
files = {'U': ufiles, 'V': vfiles}
variables = {'U': 'vozocrtx', 'V': 'vomecrty'}
dimensions = {'lon': 'nav_lon', 'lat': 'nav_lat'}
chs = 'auto'

fieldset = FieldSet.from_netcdf(files, variables, dimensions, timestamps=timestamps, allow_time_extrapolation=True, field_chunksize=chs)
assert fieldset.gridset.size == 2
assert fieldset.U.grid != fieldset.V.grid


def test_fieldset_diffgrids_from_file_data(tmpdir, filename='test_subsets'):
""" Test for subsetting fieldset from file using indices dict. """
data, dimensions = generate_fieldset(100, 100)
filepath = tmpdir.join(filename)
fieldset_data = FieldSet.from_data(data, dimensions)
fieldset_data.write(filepath)
field_data = fieldset_data.U
field_data.name = "B"

ufiles = [filepath+'U.nc', ] * 4
vfiles = [filepath+'V.nc', ] * 4
timestamps = np.arange(0, 4, 1) * 86400.0
timestamps = np.expand_dims(timestamps, 1)
files = {'U': ufiles, 'V': vfiles}
variables = {'U': 'vozocrtx', 'V': 'vomecrty'}
dimensions = {'lon': 'nav_lon', 'lat': 'nav_lat'}
chs = 'auto'
fieldset_file = FieldSet.from_netcdf(files, variables, dimensions, timestamps=timestamps, allow_time_extrapolation=True, field_chunksize=chs)

fieldset_file.add_field(field_data, "B")
assert len(fieldset_file.get_fields()) == 3
assert fieldset_file.gridset.size == 2
assert fieldset_file.U.grid != fieldset_file.B.grid


def test_fieldset_samegrids_from_data(tmpdir, filename='test_subsets'):
""" Test for subsetting fieldset from file using indices dict. """
data, dimensions = generate_fieldset(100, 100)
fieldset1 = FieldSet.from_data(data, dimensions)
field_data = fieldset1.U
field_data.name = "B"
fieldset1.add_field(field_data, "B")
assert fieldset1.gridset.size == 1
assert fieldset1.U.grid == fieldset1.B.grid


@pytest.mark.parametrize('mesh', ['flat', 'spherical'])
def test_fieldset_celledgesizes(mesh):
data, dimensions = generate_fieldset(10, 7)
Expand Down Expand Up @@ -354,6 +439,11 @@ def test_vector_fields(mode, swapUV):
@pytest.mark.parametrize('field_chunksize', [False, 'auto', (1, 32, 32)])
@pytest.mark.parametrize('with_GC', [False, True])
def test_from_netcdf_memory_containment(mode, time_periodic, field_chunksize, with_GC):
if field_chunksize == 'auto':
dask.config.set({'array.chunk-size': '2MiB'})
else:
dask.config.set({'array.chunk-size': '128MiB'})

class PerformanceLog():
samples = []
memory_steps = []
Expand Down Expand Up @@ -432,7 +522,7 @@ def test_from_netcdf_field_chunking(mode, time_periodic, field_chunksize, deferL


@pytest.mark.parametrize('datetype', ['float', 'datetime64'])
def test_timestaps(datetype, tmpdir):
def test_timestamps(datetype, tmpdir):
data1, dims1 = generate_fieldset(10, 10, 1, 10)
data2, dims2 = generate_fieldset(10, 10, 1, 4)
if datetype == 'float':
Expand Down