Parcels-code · CKehl · Apr 3, 2020 · Mar 19, 2020 · Mar 19, 2020 · Mar 20, 2020
diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml
@@ -15,6 +15,7 @@ jobs:
           ./.github/scripts/linux_install.sh
           ./.github/scripts/osx_test.sh
         shell: bash
+        timeout-minutes: 45
   test_linux:
     name: Testing on Linux
     runs-on: ubuntu-latest
@@ -36,6 +37,7 @@ jobs:
           python -m flake8 parcels/
           python -m flake8 tests/
         shell: bash
+        timeout-minutes: 45
   test_windows:
     name: Testing on Windows
     runs-on: windows-2016
@@ -52,4 +54,5 @@ jobs:
           ./.github/scripts/windows_install.bat
           ./.github/scripts/windows_test.bat
         shell: powershell
+        timeout-minutes: 45
 
diff --git a/parcels/examples/example_dask_chunk_OCMs.py b/parcels/examples/example_dask_chunk_OCMs.py
diff --git a/parcels/examples/example_nemo_curvilinear.py b/parcels/examples/example_nemo_curvilinear.py
@@ -28,7 +28,7 @@ def run_nemo_curvilinear(mode, outfile):
                        'data': data_path + 'V_purely_zonal-ORCA025_grid_V.nc4'}}
     variables = {'U': 'U', 'V': 'V'}
     dimensions = {'lon': 'glamf', 'lat': 'gphif'}
-    field_chunksize = {'lon': 2, 'lat': 2}
+    field_chunksize = {'y': 2, 'x': 2}
     field_set = FieldSet.from_nemo(filenames, variables, dimensions, field_chunksize=field_chunksize)
     assert field_set.U.field_chunksize == field_chunksize
 

diff --git a/parcels/field.py b/parcels/field.py
diff --git a/parcels/fieldset.py b/parcels/fieldset.py
diff --git a/parcels/grid.py b/parcels/grid.py
@@ -63,6 +63,7 @@ def __init__(self, lon, lat, time, time_origin, mesh):
         self.periods = 0
         self.load_chunk = []
         self.chunk_info = None
+        self.master_chunksize = None
         self._add_last_periodic_data_timestep = False
 
     @staticmethod

diff --git a/parcels/gridset.py b/parcels/gridset.py
@@ -1,4 +1,6 @@
 import numpy as np
+import functools
+from parcels.tools.loggers import logger
 
 __all__ = ['GridSet']
 
@@ -15,20 +17,46 @@ def add_grid(self, field):
         grid = field.grid
         existing_grid = False
         for g in self.grids:
+            if field.field_chunksize != grid.master_chunksize:
+                logger.warning_once("Field chunksize and Grid master chunksize are not equal - erroneous behaviour expected.")
+                break
+            if g == grid:
+                existing_grid = True
+                break
             sameGrid = True
+            sameDims = True
             if grid.time_origin != g.time_origin:
+                sameDims = False
                 continue
             for attr in ['lon', 'lat', 'depth', 'time']:
                 gattr = getattr(g, attr)
                 gridattr = getattr(grid, attr)
                 if gattr.shape != gridattr.shape or not np.allclose(gattr, gridattr):
                     sameGrid = False
+                    sameDims = False
                     break
-            if not sameGrid:
+            if not sameDims:
                 continue
-            existing_grid = True
-            field.grid = g
-            break
+            sameGrid &= (grid.master_chunksize == g.master_chunksize) or (grid.master_chunksize in [False, None] and g.master_chunksize in [False, None])
+            if not sameGrid and sameDims and grid.master_chunksize is not None:
+                print(field.field_chunksize)
+                print(grid.master_chunksize)
+                print(g.master_chunksize)
+                res = False
+                if (isinstance(grid.master_chunksize, tuple) and isinstance(g.master_chunksize, tuple)) or \
+                        (isinstance(grid.master_chunksize, dict) and isinstance(g.master_chunksize, dict)):
+                    res |= functools.reduce(lambda i, j: i and j,
+                                            map(lambda m, k: m == k, grid.master_chunksize, g.master_chunksize), True)
+                if res:
+                    sameGrid = True
+                    logger.warning_once("Trying to initialize a shared grid with different chunking sizes - action prohibited. Replacing requested field_chunksize with grid's master chunksize.")
+                else:
+                    raise ValueError("Conflict between grids of the same gridset: major grid chunksize and requested sibling-grid chunksize as well as their chunk-dimension names are not equal - Please apply the same chunksize to all fields in a shared grid!")
+                break
+            if sameGrid:
+                existing_grid = True
+                field.grid = g
+                break
 
         if not existing_grid:
             self.grids.append(grid)

diff --git a/parcels/kernel.py b/parcels/kernel.py
@@ -273,7 +273,7 @@ def execute_python(self, pset, endtime, dt):
 
             # Don't execute particles that aren't started yet
             sign_end_part = np.sign(endtime - particles.time)
-
+            # Compute min/max dt for first timestep
             dt_pos = min(abs(particles.dt), abs(endtime - particles.time))
 
             # ==== numerically stable; also making sure that continuously-recovered particles do end successfully,

diff --git a/parcels/particle.py b/parcels/particle.py
@@ -7,9 +7,10 @@
 from parcels.tools.error import ErrorCode
 from parcels.tools.loggers import logger
 
-
 __all__ = ['ScipyParticle', 'JITParticle', 'Variable']
 
+indicators_64bit = [np.float64, np.int64, c_void_p]
+
 
 class Variable(object):
     """Descriptor class that delegates data access to particle data
@@ -47,8 +48,7 @@ def __repr__(self):
 
     def is64bit(self):
         """Check whether variable is 64-bit"""
-        return True if self.dtype == np.float64 or self.dtype == np.int64 \
-                       or self.dtype == c_void_p else False
+        return True if self.dtype in indicators_64bit else False
 
 
 class ParticleType(object):

diff --git a/parcels/scripts/get_examples.py b/parcels/scripts/get_examples.py
@@ -37,10 +37,13 @@
         "U_purely_zonal-ORCA025_grid_U.nc4", "V_purely_zonal-ORCA025_grid_V.nc4",
         "mesh_mask.nc4"]]
     + ["NemoNorthSeaORCA025-N006_data/" + fn for fn in [
-        "ORCA025-N06_20000104d05U.nc", "ORCA025-N06_20000109d05U.nc",
-        "ORCA025-N06_20000104d05V.nc", "ORCA025-N06_20000109d05V.nc",
-        "ORCA025-N06_20000104d05W.nc", "ORCA025-N06_20000109d05W.nc",
+        "ORCA025-N06_20000104d05U.nc", "ORCA025-N06_20000109d05U.nc", "ORCA025-N06_20000114d05U.nc", "ORCA025-N06_20000119d05U.nc", "ORCA025-N06_20000124d05U.nc", "ORCA025-N06_20000129d05U.nc",
+        "ORCA025-N06_20000104d05V.nc", "ORCA025-N06_20000109d05V.nc", "ORCA025-N06_20000114d05V.nc", "ORCA025-N06_20000119d05V.nc", "ORCA025-N06_20000124d05V.nc", "ORCA025-N06_20000129d05V.nc",
+        "ORCA025-N06_20000104d05W.nc", "ORCA025-N06_20000109d05W.nc", "ORCA025-N06_20000114d05W.nc", "ORCA025-N06_20000119d05W.nc", "ORCA025-N06_20000124d05W.nc", "ORCA025-N06_20000129d05W.nc",
         "coordinates.nc"]]
+    + ["POPSouthernOcean_data/" + fn for fn in ["t.x1_SAMOC_flux.169000.nc", "t.x1_SAMOC_flux.169001.nc",
+                                                "t.x1_SAMOC_flux.169002.nc", "t.x1_SAMOC_flux.169003.nc",
+                                                "t.x1_SAMOC_flux.169004.nc", "t.x1_SAMOC_flux.169005.nc"]]
     + ["WOA_data/" + fn for fn in ["woa18_decav_t%.2d_04.nc" % m
                                    for m in range(1, 13)]])
 

diff --git a/tests/test_fieldset.py b/tests/test_fieldset.py
@@ -2,6 +2,7 @@
 from parcels.field import Field, VectorField
 from parcels.tools.converters import TimeConverter, _get_cftime_calendars, _get_cftime_datetimes, UnitConverter, GeographicPolar
 import dask.array as da
+import dask
 from datetime import timedelta as delta
 import datetime
 import numpy as np
@@ -239,6 +240,90 @@ def test_add_duplicate_field(dupobject):
     assert error_thrown
 
 
+def test_fieldset_samegrids_from_file(tmpdir, filename='test_subsets'):
+    """ Test for subsetting fieldset from file using indices dict. """
+    data, dimensions = generate_fieldset(100, 100)
+    filepath1 = tmpdir.join(filename+'_1')
+    fieldset1 = FieldSet.from_data(data, dimensions)
+    fieldset1.write(filepath1)
+
+    ufiles = [filepath1+'U.nc', ] * 4
+    vfiles = [filepath1+'V.nc', ] * 4
+    timestamps = np.arange(0, 4, 1) * 86400.0
+    timestamps = np.expand_dims(timestamps, 1)
+    files = {'U': ufiles, 'V': vfiles}
+    variables = {'U': 'vozocrtx', 'V': 'vomecrty'}
+    dimensions = {'lon': 'nav_lon', 'lat': 'nav_lat'}
+    chs = 'auto'
+    fieldset = FieldSet.from_netcdf(files, variables, dimensions, timestamps=timestamps, allow_time_extrapolation=True, field_chunksize=chs)
+
+    assert fieldset.gridset.size == 1
+    assert fieldset.U.grid == fieldset.V.grid
+    assert fieldset.U.grid.master_chunksize == fieldset.V.grid.master_chunksize
+    assert fieldset.U.field_chunksize == fieldset.V.field_chunksize
+
+
+def test_fieldset_diffgrids_from_file(tmpdir, filename='test_subsets'):
+    """ Test for subsetting fieldset from file using indices dict. """
+    data, dimensions = generate_fieldset(100, 100)
+    filepath1 = tmpdir.join(filename+'_1')
+    fieldset1 = FieldSet.from_data(data, dimensions)
+    fieldset1.write(filepath1)
+    data, dimensions = generate_fieldset(50, 50)
+    filepath2 = tmpdir.join(filename + '_2')
+    fieldset2 = FieldSet.from_data(data, dimensions)
+    fieldset2.write(filepath2)
+
+    ufiles = [filepath1+'U.nc', ] * 4
+    vfiles = [filepath2+'V.nc', ] * 4
+    timestamps = np.arange(0, 4, 1) * 86400.0
+    timestamps = np.expand_dims(timestamps, 1)
+    files = {'U': ufiles, 'V': vfiles}
+    variables = {'U': 'vozocrtx', 'V': 'vomecrty'}
+    dimensions = {'lon': 'nav_lon', 'lat': 'nav_lat'}
+    chs = 'auto'
+
+    fieldset = FieldSet.from_netcdf(files, variables, dimensions, timestamps=timestamps, allow_time_extrapolation=True, field_chunksize=chs)
+    assert fieldset.gridset.size == 2
+    assert fieldset.U.grid != fieldset.V.grid
+
+
+def test_fieldset_diffgrids_from_file_data(tmpdir, filename='test_subsets'):
+    """ Test for subsetting fieldset from file using indices dict. """
+    data, dimensions = generate_fieldset(100, 100)
+    filepath = tmpdir.join(filename)
+    fieldset_data = FieldSet.from_data(data, dimensions)
+    fieldset_data.write(filepath)
+    field_data = fieldset_data.U
+    field_data.name = "B"
+
+    ufiles = [filepath+'U.nc', ] * 4
+    vfiles = [filepath+'V.nc', ] * 4
+    timestamps = np.arange(0, 4, 1) * 86400.0
+    timestamps = np.expand_dims(timestamps, 1)
+    files = {'U': ufiles, 'V': vfiles}
+    variables = {'U': 'vozocrtx', 'V': 'vomecrty'}
+    dimensions = {'lon': 'nav_lon', 'lat': 'nav_lat'}
+    chs = 'auto'
+    fieldset_file = FieldSet.from_netcdf(files, variables, dimensions, timestamps=timestamps, allow_time_extrapolation=True, field_chunksize=chs)
+
+    fieldset_file.add_field(field_data, "B")
+    assert len(fieldset_file.get_fields()) == 3
+    assert fieldset_file.gridset.size == 2
+    assert fieldset_file.U.grid != fieldset_file.B.grid
+
+
+def test_fieldset_samegrids_from_data(tmpdir, filename='test_subsets'):
+    """ Test for subsetting fieldset from file using indices dict. """
+    data, dimensions = generate_fieldset(100, 100)
+    fieldset1 = FieldSet.from_data(data, dimensions)
+    field_data = fieldset1.U
+    field_data.name = "B"
+    fieldset1.add_field(field_data, "B")
+    assert fieldset1.gridset.size == 1
+    assert fieldset1.U.grid == fieldset1.B.grid
+
+
 @pytest.mark.parametrize('mesh', ['flat', 'spherical'])
 def test_fieldset_celledgesizes(mesh):
     data, dimensions = generate_fieldset(10, 7)
@@ -354,6 +439,11 @@ def test_vector_fields(mode, swapUV):
 @pytest.mark.parametrize('field_chunksize', [False, 'auto', (1, 32, 32)])
 @pytest.mark.parametrize('with_GC', [False, True])
 def test_from_netcdf_memory_containment(mode, time_periodic, field_chunksize, with_GC):
+    if field_chunksize == 'auto':
+        dask.config.set({'array.chunk-size': '2MiB'})
+    else:
+        dask.config.set({'array.chunk-size': '128MiB'})
+
     class PerformanceLog():
         samples = []
         memory_steps = []
@@ -432,7 +522,7 @@ def test_from_netcdf_field_chunking(mode, time_periodic, field_chunksize, deferL
 
 
 @pytest.mark.parametrize('datetype', ['float', 'datetime64'])
-def test_timestaps(datetype, tmpdir):
+def test_timestamps(datetype, tmpdir):
     data1, dims1 = generate_fieldset(10, 10, 1, 10)
     data2, dims2 = generate_fieldset(10, 10, 1, 4)
     if datetype == 'float':