diff --git a/parcels/__init__.py b/parcels/__init__.py
index 2b55c9b35d..509ebae1eb 100644
--- a/parcels/__init__.py
+++ b/parcels/__init__.py
@@ -4,8 +4,10 @@
 from parcels.fieldset import *  # noqa
 from parcels.particle import *  # noqa
 from parcels.particleset import *  # noqa
+# from parcels.particleset_benchmark import *  # noga
 from parcels.field import *  # noqa
 from parcels.kernel import *  # noqa
+# from parcels.kernel_benchmark import *  # noga
 import parcels.rng as ParcelsRandom  # noqa
 from parcels.particlefile import *  # noqa
 from parcels.kernels import *  # noqa
diff --git a/parcels/examples/tutorial_sampling.py b/parcels/examples/tutorial_sampling.py
new file mode 100644
index 0000000000..8c99a235cd
--- /dev/null
+++ b/parcels/examples/tutorial_sampling.py
@@ -0,0 +1,252 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# # Field sampling tutorial
+
+# The particle trajectories allow us to study fields like temperature, plastic concentration or chlorophyll from a Lagrangian perspective. 
+# 
+# In this tutorial we will go through how particles can sample `Fields`, using temperature as an example. Along the way we will get to know the parcels class `Variable` (see [here](https://oceanparcels.org/gh-pages/html/#parcels.particle.Variable) for the documentation) and some of its methods. This tutorial covers several applications of a sampling setup:
+# * [**Basic along trajectory sampling**](#Basic-sampling)
+# * [**Sampling initial conditions**](#Sampling-initial-values)
+# * [**Sampling initial and along-trajectory values with repeated release**](#Sampling-with-repeatdt)
+
+# ### Basic sampling
+# We import the `Variable` class as well as the standard modules needed to set up a simulation.
+
+# In[1]:
+
+
+# Modules needed for the Parcels simulation
+from parcels import Variable, FieldSet, ParticleSet, JITParticle, AdvectionRK4
+import numpy as np
+from datetime import timedelta as delta
+
+# To open and look at the temperature data
+import xarray as xr 
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+
+
+# Suppose we want to study the environmental temperature for plankton drifting around a peninsula. We have a dataset with surface ocean velocities and the corresponding sea surface temperature stored in netcdf files in the folder `"Peninsula_data"`. Besides the velocity fields, we load the temperature field using `extra_fields={'T': 'T'}`. The particles are released on the left hand side of the domain.
+
+# In[2]:
+
+
+# Velocity and temperature fields
+fieldset = FieldSet.from_parcels("Peninsula_data/peninsula", extra_fields={'T': 'T'}, allow_time_extrapolation=True)
+
+# Particle locations and initial time
+npart = 10  # number of particles to be released
+lon = 3e3 * np.ones(npart)
+lat = np.linspace(3e3 , 45e3, npart, dtype=np.float32)
+time = np.arange(0, npart) * delta(hours=2).total_seconds()  # release each particle two hours later
+
+# Plot temperature field and initial particle locations
+T_data = xr.open_dataset("Peninsula_data/peninsulaT.nc")
+plt.figure()
+ax = plt.axes()
+T_contour = ax.contourf(T_data.x.values, T_data.y.values, T_data.T.values[0,0], cmap=plt.cm.inferno)
+ax.scatter(lon, lat, c='w')
+plt.colorbar(T_contour, label='T [$^{\circ} C$]')
+plt.show()
+
+
+# To sample the temperature field, we need to create a new class of particles where temperature is a `Variable`. As an argument for the `Variable` class, we need to provide the initial values for the particles. The easiest option is to access `fieldset.T`, but this option has some drawbacks.
+
+# In[3]:
+
+
+class SampleParticle(JITParticle):         # Define a new particle class
+    temperature = Variable('temperature', initial=fieldset.T)  # Variable 'temperature' initialised by sampling the temperature
+
+pset = ParticleSet(fieldset=fieldset, pclass=SampleParticle, lon=lon, lat=lat, time=time)
+
+
+# Using `fieldset.T` leads to the `WARNING` displayed above because `Variable` accesses the fieldset in the slower SciPy mode. Another problem can occur when using the repeatdt argument instead of time:
+
+# <a id='repeatdt_error'></a>
+
+# In[4]:
+
+
+repeatdt = delta(hours=3)
+
+try:
+    pset = ParticleSet(fieldset=fieldset, pclass=SampleParticle, lon=lon, lat=lat, repeatdt=repeatdt)
+except (RuntimeError, ValueError) as e:
+    print(e)
+
+
+# Since the initial time is not defined, the `Variable` class does not know at what time to access the temperature field.
+
+# The solution to this initialisation problem is to leave the initial value zero and sample the initial condition in JIT mode with the sampling Kernel:
+
+# In[5]:
+
+
+class SampleParticleInitZero(JITParticle):            # Define a new particle class
+    temperature = Variable('temperature', initial=0)  # Variable 'temperature' initially zero
+
+pset = ParticleSet(fieldset=fieldset, pclass=SampleParticleInitZero, lon=lon, lat=lat, time=time)
+
+def SampleT(particle, fieldset, time):
+         particle.temperature = fieldset.T[time, particle.depth, particle.lat, particle.lon]
+sample_kernel = pset.Kernel(SampleT)    # Casting the SampleT function to a kernel.
+
+
+# To sample the initial values we can execute the Sample kernel over the entire particleset with dt = 0 so that time does not increase
+
+# In[6]:
+
+
+pset.execute(sample_kernel, dt=0) # by only executing the sample kernel we record the initial temperature of the particles
+
+output_file = pset.ParticleFile(name="InitZero.nc", outputdt=delta(hours=1))
+pset.execute(AdvectionRK4 + sample_kernel, runtime=delta(hours=30), dt=delta(minutes=5),
+             output_file=output_file)
+output_file.export()  # export the trajectory data to a netcdf file
+output_file.close()
+
+
+# The particle dataset now contains the particle trajectories and the corresponding environmental temperature
+
+# In[7]:
+
+
+Particle_data = xr.open_dataset("InitZero.nc")
+
+plt.figure()
+ax = plt.axes()
+ax.set_ylabel('Y')
+ax.set_xlabel('X')
+ax.set_ylim(1000, 49000)
+ax.set_xlim(1000, 99000)
+ax.plot(Particle_data.lon.transpose(), Particle_data.lat.transpose(), c='k', zorder=1)
+T_scatter = ax.scatter(Particle_data.lon, Particle_data.lat, c=Particle_data.temperature, 
+                       cmap=plt.cm.inferno, norm=mpl.colors.Normalize(vmin=0., vmax=20.), 
+                       edgecolor='k', zorder=2)
+plt.colorbar(T_scatter, label='T [$^{\circ} C$]')
+plt.show()
+
+
+# ### Sampling initial values
+
+# In some simulations only the particles initial value within the field is of interest: the variable does not need to be known along the entire trajectory. To reduce computing we can specify the `to_write` argument to the temperature `Variable`. This argument can have three values: `True`, `False` or `'once'`. It determines whether to write the `Variable` to the output file. If we want to know only the initial value, we can enter `'once'` and only the first value will be written to the output file.
+
+# In[8]:
+
+
+class SampleParticleOnce(JITParticle):         # Define a new particle class
+    temperature = Variable('temperature', initial=0, to_write='once')  # Variable 'temperature'
+    
+pset = ParticleSet(fieldset=fieldset, pclass=SampleParticleOnce, lon=lon, lat=lat, time=time)
+
+
+# In[9]:
+
+
+pset.execute(sample_kernel, dt=0) # by only executing the sample kernel we record the initial temperature of the particles
+
+output_file = pset.ParticleFile(name="WriteOnce.nc", outputdt=delta(hours=1))
+pset.execute(AdvectionRK4, runtime=delta(hours=24), dt=delta(minutes=5),
+             output_file=output_file)
+output_file.close()
+
+
+# Since all the particles are released at the same x-position and the temperature field is invariant in the y-direction, all particles have an initial temperature of 0.4$^\circ$C
+
+# In[10]:
+
+
+Particle_data = xr.open_dataset("WriteOnce.nc")
+
+plt.figure()
+ax = plt.axes()
+ax.set_ylabel('Y')
+ax.set_xlabel('X')
+ax.set_ylim(1000, 49000)
+ax.set_xlim(1000, 99000)
+ax.plot(Particle_data.lon.transpose(), Particle_data.lat.transpose(), c='k', zorder=1)
+T_scatter = ax.scatter(Particle_data.lon, Particle_data.lat, 
+                       c=np.tile(Particle_data.temperature, (Particle_data.lon.shape[1], 1)).T,
+                       cmap=plt.cm.inferno, norm=mpl.colors.Normalize(vmin=0., vmax=1.), 
+                       edgecolor='k', zorder=2)
+plt.colorbar(T_scatter, label='Initial T [$^{\circ} C$]')
+plt.show()
+
+
+# ### Sampling with repeatdt
+
+# Some experiments require large sets of particles to be released repeatedly on the same locations. The  [`particleset`](https://oceanparcels.org/gh-pages/html/#module-parcels.particleset) object has the option `repeatdt` for this, but when you want to sample the initial values this introduces some problems as we have seen [here](#repeatdt_error). For more advanced control over the repeated release of particles, you can manually write a for-loop using the function `particleset.add()`. Note that this for-loop is very similar to the one that  `repeatdt` would execute under the hood in `particleset.execute()`.
+# 
+# Adding particles to the `particleset` during the simulation reduces the memory used compared to specifying the delayed particle release times upfront, which improves the computational speed. In the loop, we want to initialise new particles and sample their initial temperature. If we want to write both the initialised particles with the sampled temperature and the older particles that have already been advected, we have to make sure both sets of particles find themselves at the same moment in time. The initial conditions must be written to the output file before advecting them, because during advection the `particle.time` will increase.
+# 
+# We do not specify the `outputdt` argument for the `output_file` and instead write the data with `output_file.write(pset, time)` on each iteration. A new particleset is initialised whenever time is a multiple of `repeatdt`. Because the particles are advected after being written, the last displacement must be written once more after the loop.
+
+# In[11]:
+
+
+outputdt = delta(hours=1).total_seconds() # write the particle data every hour
+repeatdt = delta(hours=6).total_seconds() # release each set of particles six hours later
+runtime = delta(hours=24).total_seconds() 
+
+pset = ParticleSet(fieldset=fieldset, pclass=SampleParticleInitZero, lon=[], lat=[], time=[]) # Using SampleParticleInitZero
+kernels = AdvectionRK4 + sample_kernel
+
+output_file = pset.ParticleFile(name="RepeatLoop.nc") # Do not specify the outputdt yet, so we can manually write the output
+
+for time in np.arange(0, runtime, outputdt):
+    if time % repeatdt == 0:                      # time is a multiple of repeatdt
+        pset_init = ParticleSet(fieldset=fieldset, pclass=SampleParticleInitZero, lon=lon, lat=lat, time=time)
+        pset_init.execute(sample_kernel, dt=0)    # record the initial temperature of the particles
+        pset.add(pset_init)                       # add the newly released particles to the total particleset
+        
+    output_file.write(pset,time)                  # write the initialised particles and the advected particles
+
+    pset.execute(kernels, runtime=outputdt, dt=delta(minutes=5))
+    print('Length of pset at time %d: %d' % (time, len(pset)))
+    
+output_file.write(pset, time+outputdt) 
+
+output_file.close()
+
+
+# In each iteration of the loop, spanning six hours, we have added ten particles.
+
+# In[12]:
+
+
+Particle_data = xr.open_dataset("RepeatLoop.nc")
+print(Particle_data.time[:,0].values / np.timedelta64(1, 'h')) # The initial hour at which each particle is released
+assert np.allclose(Particle_data.time[:,0].values / np.timedelta64(1, 'h'), [int(k/10)*6 for k in range(40)])
+
+
+# Let's check if the initial temperatures were sampled correctly for all particles
+
+# In[13]:
+
+
+print(Particle_data.temperature[:,0].values)
+assert np.allclose(Particle_data.temperature[:,0].values, Particle_data.temperature[:,0].values[0])
+
+
+# And see if the sampling of the temperature field is done correctly along the trajectories
+
+# In[14]:
+
+
+Release0 = Particle_data.where(Particle_data.time[:,0]==np.timedelta64(0, 's')) # the particles released at t = 0
+
+plt.figure()
+ax = plt.axes()
+ax.set_ylabel('Y')
+ax.set_xlabel('X')
+ax.set_ylim(1000, 49000)
+ax.set_xlim(1000, 99000)
+ax.plot(Release0.lon.transpose(), Release0.lat.transpose(), c='k', zorder=1)
+T_scatter = ax.scatter(Release0.lon, Release0.lat, c=Release0.temperature, 
+                       cmap=plt.cm.inferno, norm=mpl.colors.Normalize(vmin=0., vmax=20.), 
+                       edgecolor='k', zorder=2)
+plt.colorbar(T_scatter, label='T [$^{\circ} C$]')
+plt.show()
+
diff --git a/parcels/field.py b/parcels/field.py
index 1ea1071630..5d737baa28 100644
--- a/parcels/field.py
+++ b/parcels/field.py
@@ -1144,25 +1144,29 @@ def chunk_data(self):
         # 1: was asked to load by kernel in JIT
         # 2: is loaded and was touched last C call
         # 3: is loaded
-        if isinstance(self.data, da.core.Array):
-            for block_id in range(len(self.grid.load_chunk)):
-                if self.grid.load_chunk[block_id] == 1 or self.grid.load_chunk[block_id] > 1 and self.data_chunks[block_id] is None:
-                    block = self.get_block(block_id)
-                    self.data_chunks[block_id] = np.array(self.data.blocks[(slice(self.grid.tdim),) + block])
-                elif self.grid.load_chunk[block_id] == 0:
-                    if isinstance(self.data_chunks, list):
-                        self.data_chunks[block_id] = None
-                    else:
-                        self.data_chunks[block_id, :] = None
-                    self.c_data_chunks[block_id] = None
-        else:
-            if isinstance(self.data_chunks, list):
-                self.data_chunks[0] = None
+        try:
+            if isinstance(self.data, da.core.Array):
+                for block_id in range(len(self.grid.load_chunk)):
+                    if self.grid.load_chunk[block_id] == 1 or self.grid.load_chunk[block_id] > 1 and self.data_chunks[block_id] is None:
+                        block = self.get_block(block_id)
+                        self.data_chunks[block_id] = np.array(self.data.blocks[(slice(self.grid.tdim),) + block])
+                    elif self.grid.load_chunk[block_id] == 0:
+                        if isinstance(self.data_chunks, list):
+                            self.data_chunks[block_id] = None
+                        else:
+                            self.data_chunks[block_id, :] = None
+                        self.c_data_chunks[block_id] = None
             else:
-                self.data_chunks[0, :] = None
-            self.c_data_chunks[0] = None
-            self.grid.load_chunk[0] = 2
-            self.data_chunks[0] = np.array(self.data)
+                if isinstance(self.data_chunks, list):
+                    self.data_chunks[0] = None
+                else:
+                    self.data_chunks[0, :] = None
+                self.c_data_chunks[0] = None
+                self.grid.load_chunk[0] = 2
+                self.data_chunks[0] = np.array(self.data)
+        except (IndexError, IOError) as error:
+            logger.error("\nField '{}' - error: {}; field chunks: type = {} length = {}; grid chunks: length = {}\n".format(self.name, error, type(self.data_chunks), len(self.data_chunks), len(self.grid.load_chunk)))
+            raise error
 
     @property
     def ctypes_struct(self):
diff --git a/parcels/fieldfilebuffer.py b/parcels/fieldfilebuffer.py
index b17dca59c3..cf7820ea90 100644
--- a/parcels/fieldfilebuffer.py
+++ b/parcels/fieldfilebuffer.py
@@ -549,7 +549,11 @@ def data_access(self):
         data = self.dataset[self.name]
 
         ti = range(data.shape[0]) if self.ti is None else self.ti
-        data = self._apply_indices(data, ti)
+        try:
+            data = self._apply_indices(data, ti)
+        except (IndexError,) as error_msg:
+            logger.error("\nError applying indices {} for field '{}'.\n".format(self.indices, self.name))
+            raise error_msg
         if isinstance(data, xr.DataArray):
             data = data.data
 
diff --git a/parcels/fieldset.py b/parcels/fieldset.py
index a5e9a648c2..9ac1ec556a 100644
--- a/parcels/fieldset.py
+++ b/parcels/fieldset.py
@@ -374,7 +374,10 @@ def from_netcdf(cls, filenames, variables, dimensions, indices=None, fieldtype=N
                     possibly_samegrid = True
                     if procchunk != varchunksize:
                         for dim in varchunksize:
-                            if varchunksize[dim][1] != procchunk[dim][1]:
+                            if dim not in procchunk:
+                                possibly_samegrid &= False
+                            # elif varchunksize[dim][0] != procchunk[dim][0] or varchunksize[dim][1] != procchunk[dim][1]:
+                            elif varchunksize[dim][1] != procchunk[dim][1]:
                                 possibly_samegrid &= False
                     if not possibly_samegrid:
                         break
diff --git a/parcels/gridset.py b/parcels/gridset.py
index ccc5d173ed..61a6db9bbf 100644
--- a/parcels/gridset.py
+++ b/parcels/gridset.py
@@ -32,7 +32,7 @@ def add_grid(self, field):
 
             if (g.chunksize != grid.chunksize) and (grid.chunksize not in [False, None]):
                 for dim in grid.chunksize:
-                    if grid.chunksize[dim][1] != g.chunksize[dim][1]:
+                    if dim not in g.chunksize.keys() or grid.chunksize[dim][1] != g.chunksize[dim][1]:
                         sameGrid &= False
                         break
 
diff --git a/parcels/kernel.py b/parcels/kernel.py
index 6bdcec08dc..4f5e422b9c 100644
--- a/parcels/kernel.py
+++ b/parcels/kernel.py
@@ -182,6 +182,9 @@ def __init__(self, fieldset, ptype, pyfunc=None, funcname=None,
             self.lib_file = "%s.%s" % (basename, 'dll' if platform == 'win32' else 'so')
             self.log_file = "%s.log" % basename
 
+    def __del__(self):
+        pass
+
     @property
     def _cache_key(self):
         field_keys = "-".join(["%s:%s" % (name, field.units.__class__.__name__)
@@ -373,6 +376,13 @@ def execute_python(self, pset, endtime, dt):
                         dt_pos = 0
                     break
 
+    def remove_deleted(self, pset, output_file, endtime):
+        """Utility to remove all particles that signalled deletion"""
+        indices = pset.particle_data['state'] == OperationCode.Delete
+        if np.count_nonzero(indices) > 0 and output_file is not None:
+            output_file.write(pset, endtime, deleted_only=indices)
+        pset.remove_booleanvector(indices)
+
     def execute(self, pset, endtime, dt, recovery=None, output_file=None, execute_once=False):
         """Execute this Kernel over a ParticleSet for several timesteps"""
         particles = pset.data_accessor()
@@ -383,13 +393,6 @@ def execute(self, pset, endtime, dt, recovery=None, output_file=None, execute_on
         if abs(dt) < 1e-6 and not execute_once:
             logger.warning_once("'dt' is too small, causing numerical accuracy limit problems. Please chose a higher 'dt' and rather scale the 'time' axis of the field accordingly. (related issue #762)")
 
-        def remove_deleted(pset):
-            """Utility to remove all particles that signalled deletion"""
-            indices = pset.particle_data['state'] == OperationCode.Delete
-            if np.count_nonzero(indices) > 0 and output_file is not None:
-                output_file.write(pset, endtime, deleted_only=indices)
-            pset.remove_booleanvector(indices)
-
         if recovery is None:
             recovery = {}
         elif ErrorCode.ErrorOutOfBounds in recovery and ErrorCode.ErrorThroughSurface not in recovery:
@@ -409,10 +412,11 @@ def remove_deleted(pset):
             self.execute_python(pset, endtime, dt)
 
         # Remove all particles that signalled deletion
-        remove_deleted(pset)
+        self.remove_deleted(pset, output_file=output_file, endtime=endtime)
 
         # Identify particles that threw errors
         error_particles = np.isin(pset.particle_data['state'], [StateCode.Success, StateCode.Evaluate], invert=True)
+
         while np.any(error_particles):
             # Apply recovery kernel
             for p in np.where(error_particles)[0]:
@@ -421,6 +425,8 @@ def remove_deleted(pset):
                     return
                 if particles.state == OperationCode.Repeat:
                     particles.set_state(StateCode.Evaluate)
+                elif particles.state == OperationCode.Delete:
+                    pass
                 elif particles.state in recovery_map:
                     recovery_kernel = recovery_map[particles.state]
                     particles.set_state(StateCode.Success)
@@ -428,11 +434,12 @@ def remove_deleted(pset):
                     if particles.state == StateCode.Success:
                         particles.set_state(StateCode.Evaluate)
                 else:
-                    logger.warning_once('Deleting particle because of bug in #749 and #737')
+                    logger.warning_once('Deleting particle {} because of non-recoverable error'.format(particles.id))
+                    # logger.warning('Deleting particle because of bug in #749 and #737 - particle state: {}'.format(ErrorCode.toString(particles.state)))
                     particles.delete()
 
             # Remove all particles that signalled deletion
-            remove_deleted(pset)
+            self.remove_deleted(pset, output_file=output_file, endtime=endtime)
 
             # Execute core loop again to continue interrupted particles
             if self.ptype.uses_jit:
diff --git a/parcels/kernel_benchmark.py b/parcels/kernel_benchmark.py
new file mode 100644
index 0000000000..f756a808bc
--- /dev/null
+++ b/parcels/kernel_benchmark.py
@@ -0,0 +1,296 @@
+# import _ctypes
+# import inspect
+# import math  # noqa
+# import random  # noqa
+# import re
+# import time
+from ast import FunctionDef
+# from ast import parse
+# from copy import deepcopy
+from ctypes import byref
+from ctypes import c_double
+from ctypes import c_int
+# from ctypes import c_void_p
+# from hashlib import md5
+# from os import path
+# from os import remove
+# from sys import platform
+# from sys import version_info
+
+import numpy as np
+# import numpy.ctypeslib as npct
+try:
+    from mpi4py import MPI
+except:
+    MPI = None
+
+# from parcels.field import Field
+from parcels.field import FieldOutOfBoundError
+from parcels.field import FieldOutOfBoundSurfaceError
+from parcels.field import TimeExtrapolationError
+from parcels.field import NestedField
+from parcels.field import SummedField
+from parcels.field import VectorField
+from parcels.tools.statuscodes import StateCode, OperationCode, ErrorCode
+from parcels.tools.loggers import logger
+
+from parcels.kernel import Kernel
+from parcels.tools.performance_logger import TimingLog
+
+__all__ = ['Kernel_Benchmark']
+
+
+class Kernel_Benchmark(Kernel):
+    """Kernel object that encapsulates auto-generated code.
+
+    :arg fieldset: FieldSet object providing the field information
+    :arg ptype: PType object for the kernel particle
+    :param delete_cfiles: Boolean whether to delete the C-files after compilation in JIT mode (default is True)
+
+    Note: A Kernel is either created from a compiled <function ...> object
+    or the necessary information (funcname, funccode, funcvars) is provided.
+    The py_ast argument may be derived from the code string, but for
+    concatenation, the merged AST plus the new header definition is required.
+    """
+
+    def __init__(self, fieldset, ptype, pyfunc=None, funcname=None,
+                 funccode=None, py_ast=None, funcvars=None, c_include="", delete_cfiles=True):
+        super(Kernel_Benchmark, self).__init__(fieldset, ptype, pyfunc, funcname, funccode, py_ast, funcvars, c_include, delete_cfiles)
+        self._compute_timings = TimingLog()
+        self._io_timings = TimingLog()
+        self._mem_io_timings = TimingLog()
+
+    @property
+    def io_timings(self):
+        return self._io_timings
+
+    @property
+    def mem_io_timings(self):
+        return self._mem_io_timings
+
+    @property
+    def compute_timings(self):
+        return self._compute_timings
+
+    def __del__(self):
+        super(Kernel_Benchmark, self).__del__()
+
+    def execute_jit(self, pset, endtime, dt):
+        """Invokes JIT engine to perform the core update loop"""
+        if len(pset) > 0 and pset.particle_data['xi'].ndim == 2 and pset.fieldset is not None:
+            assert pset.fieldset.gridset.size == pset.particle_data['xi'].shape[1], \
+                'FieldSet has different number of grids than Particle.xi. Have you added Fields after creating the ParticleSet?'
+
+        if pset.fieldset is not None:
+            self._io_timings.start_timing()
+            for g in pset.fieldset.gridset.grids:
+                g.cstruct = None  # This force to point newly the grids from Python to C
+
+            # Make a copy of the transposed array to enforce
+            # C-contiguous memory layout for JIT mode.
+            for f in pset.fieldset.get_fields():
+                if type(f) in [VectorField, NestedField, SummedField]:
+                    continue
+                if f in self.field_args.values():
+                    f.chunk_data()
+                else:
+                    for block_id in range(len(f.data_chunks)):
+                        f.data_chunks[block_id] = None
+                        f.c_data_chunks[block_id] = None
+            self._io_timings.stop_timing()
+            self._io_timings.accumulate_timing()
+
+            self._mem_io_timings.start_timing()
+            for g in pset.fieldset.gridset.grids:
+                g.load_chunk = np.where(g.load_chunk == 1, 2, g.load_chunk)
+                if len(g.load_chunk) > 0:  # not the case if a field in not called in the kernel
+                    if not g.load_chunk.flags.c_contiguous:
+                        g.load_chunk = g.load_chunk.copy()
+                if not g.depth.flags.c_contiguous:
+                    g.depth = g.depth.copy()
+                if not g.lon.flags.c_contiguous:
+                    g.lon = g.lon.copy()
+                if not g.lat.flags.c_contiguous:
+                    g.lat = g.lat.copy()
+            self._mem_io_timings.stop_timing()
+            self._mem_io_timings.accumulate_timing()
+
+        self._compute_timings.start_timing()
+        fargs = []
+        if pset.fieldset is not None:
+            fargs += [byref(f.ctypes_struct) for f in self.field_args.values()]
+        fargs += [c_double(f) for f in self.const_args.values()]
+        # particle_data = pset._particle_data.ctypes.data_as(c_void_p)
+        particle_data = byref(pset.ctypes_struct)
+        result = self._function(c_int(len(pset)), particle_data,
+                                c_double(endtime),
+                                c_double(dt),
+                                *fargs)
+        self._compute_timings.stop_timing()
+        self._compute_timings.accumulate_timing()
+
+        self._io_timings.advance_iteration()
+        self._mem_io_timings.advance_iteration()
+        self._compute_timings.advance_iteration()
+        return result
+
+    def execute_python(self, pset, endtime, dt):
+        """Performs the core update loop via Python"""
+        sign_dt = np.sign(dt)
+
+        if 'AdvectionAnalytical' in self.pyfunc.__name__:
+            analytical = True
+            if not np.isinf(dt):
+                logger.warning_once('dt is not used in AnalyticalAdvection, so is set to np.inf')
+            dt = np.inf
+        else:
+            analytical = False
+
+        particles = pset.data_accessor()
+
+        # back up variables in case of ErrorCode.Repeat
+        p_var_back = {}
+
+        for f in self.fieldset.get_fields():
+            if type(f) in [VectorField, NestedField, SummedField]:
+                continue
+
+            self._io_timings.start_timing()
+            loaded_data = f.data
+            self._io_timings.stop_timing()
+            self._io_timings.accumulate_timing()
+            self._mem_io_timings.start_timing()
+            f.data = np.array(loaded_data)
+            self._mem_io_timings.stop_timing()
+            self._mem_io_timings.accumulate_timing()
+
+        self._compute_timings.start_timing()
+        # for p in pset.particles:
+        for p in range(pset.size):
+            particles.set_index(p)
+
+            ptype = pset.ptype
+            # Don't execute particles that aren't started yet
+            # sign_end_part = np.sign(endtime - p.time)
+            sign_end_part = np.sign(endtime - particles.time)
+
+            # Compute min/max dt for first timestep
+            # dt_pos = min(abs(p.dt), abs(endtime - p.time))
+            dt_pos = min(abs(particles.dt), abs(endtime - particles.time))
+
+            # ==== numerically stable; also making sure that continuously-recovered particles do end successfully,
+            # as they fulfil the condition here on entering at the final calculation here. ==== #
+            if ((sign_end_part != sign_dt) or np.isclose(dt_pos, 0)) and not np.isclose(dt, 0):
+                if abs(particles.time) >= abs(endtime):
+                    particles.set_state(StateCode.Success)
+                # if abs(p.time) >= abs(endtime):
+                #     p.state = ErrorCode.Success
+                continue
+
+            # while p.state in [ErrorCode.Evaluate, ErrorCode.Repeat] or np.isclose(dt, 0):
+            while particles.state in [StateCode.Evaluate, OperationCode.Repeat] or np.isclose(dt, 0):
+
+                for var in ptype.variables:
+                    # p_var_back[var.name] = getattr(p, var.name)
+                    p_var_back[var.name] = getattr(particles, var.name)
+                try:
+                    pdt_prekernels = sign_dt * dt_pos
+                    p.dt = pdt_prekernels
+                    state_prev = p.state
+                    # res = self.pyfunc(p, pset.fieldset, p.time)
+                    res = self.pyfunc(particles, pset.fieldset, particles.time)
+                    if res is None:
+                        res = StateCode.Success
+
+                    # if res is StateCode.Success and p.state != state_prev:
+                    #     res = p.state
+                    if res is StateCode.Success and particles.state != state_prev:
+                        res = particles.state
+
+                    # if res == ErrorCode.Success and not np.isclose(p.dt, pdt_prekernels):
+                    #     res = ErrorCode.Repeat
+                    if not analytical and res == StateCode.Success and not np.isclose(particles.dt, pdt_prekernels):
+                        res = OperationCode.Repeat
+
+                except FieldOutOfBoundError as fse_xy:
+                    res = ErrorCode.ErrorOutOfBounds
+                    particles.exception = fse_xy
+                except FieldOutOfBoundSurfaceError as fse_z:
+                    res = ErrorCode.ErrorThroughSurface
+                    particles.exception = fse_z
+                except TimeExtrapolationError as fse_t:
+                    res = ErrorCode.ErrorTimeExtrapolation
+                    particles.exception = fse_t
+                except Exception as e:
+                    res = ErrorCode.Error
+                    particles.exception = e
+
+                # Handle particle time and time loop
+                # if res in [ErrorCode.Success, ErrorCode.Delete]:
+                if res in [StateCode.Success, OperationCode.Delete]:
+                    # Update time and repeat
+                    particles.time += p.dt
+                    particles.update_next_dt()
+                    if analytical:
+                        particles.dt = np.inf
+                    dt_pos = min(abs(particles.dt), abs(endtime - particles.time))
+
+                    sign_end_part = np.sign(endtime - particles.time)
+                    # if res != ErrorCode.Delete and not np.isclose(dt_pos, 0) and (sign_end_part == sign_dt):
+                    #     res = ErrorCode.Evaluate
+                    if res != OperationCode.Delete and not np.isclose(dt_pos, 0) and (sign_end_part == sign_dt):
+                        res = StateCode.Evaluate
+                    if sign_end_part != sign_dt:
+                        dt_pos = 0
+
+                    p.state = res
+                    if np.isclose(dt, 0):
+                        break
+                else:
+                    # p.state = res
+                    particles.set_state(res)
+                    # Try again without time update
+                    for var in ptype.variables:
+                        if var.name not in ['dt', 'state']:
+                            setattr(particles, var.name, p_var_back[var.name])
+                    dt_pos = min(abs(particles.dt), abs(endtime - particles.time))
+
+                    sign_end_part = np.sign(endtime - particles.time)
+                    if sign_end_part != sign_dt:
+                        dt_pos = 0
+                    break
+        self._compute_timings.stop_timing()
+        self._compute_timings.accumulate_timing()
+
+        self._io_timings.advance_iteration()
+        self._mem_io_timings.advance_iteration()
+        self._compute_timings.advance_iteration()
+
+    def remove_deleted(self, pset, output_file, endtime):
+        """Utility to remove all particles that signalled deletion"""
+        self._mem_io_timings.start_timing()
+        super(Kernel_Benchmark, self).remove_deleted(pset=pset, output_file=output_file, endtime=endtime)
+        self._mem_io_timings.stop_timing()
+        self._mem_io_timings.accumulate_timing()
+        self._mem_io_timings.advance_iteration()
+
+    def merge(self, kernel):
+        funcname = self.funcname + kernel.funcname
+        func_ast = FunctionDef(name=funcname, args=self.py_ast.args,
+                               body=self.py_ast.body + kernel.py_ast.body,
+                               decorator_list=[], lineno=1, col_offset=0)
+        delete_cfiles = self.delete_cfiles and kernel.delete_cfiles
+        return Kernel_Benchmark(self.fieldset, self.ptype, pyfunc=None,
+                                funcname=funcname, funccode=self.funccode + kernel.funccode,
+                                py_ast=func_ast, funcvars=self.funcvars + kernel.funcvars,
+                                delete_cfiles=delete_cfiles)
+
+    def __add__(self, kernel):
+        if not isinstance(kernel, Kernel):
+            kernel = Kernel_Benchmark(self.fieldset, self.ptype, pyfunc=kernel)
+        return self.merge(kernel)
+
+    def __radd__(self, kernel):
+        if not isinstance(kernel, Kernel):
+            kernel = Kernel_Benchmark(self.fieldset, self.ptype, pyfunc=kernel)
+        return kernel.merge(self)
diff --git a/parcels/particleset.py b/parcels/particleset.py
index ef214659be..79538a98a8 100644
--- a/parcels/particleset.py
+++ b/parcels/particleset.py
@@ -135,7 +135,9 @@ def __init__(self, fieldset=None, pclass=JITParticle, lon=None, lat=None, depth=
 
         def convert_to_array(var):
             # Convert lists and single integers/floats to one-dimensional numpy arrays
-            if isinstance(var, np.ndarray):
+            if var is None:
+                return np.array([0])
+            elif isinstance(var, np.ndarray):
                 return var.flatten()
             elif isinstance(var, (int, float, np.float32, np.int32)):
                 return np.array([var])
@@ -156,7 +158,7 @@ def convert_to_array(var):
         assert lon.size == lat.size and lon.size == depth.size, (
             'lon, lat, depth don''t all have the same lenghts')
 
-        time = convert_to_array(time)
+        time = convert_to_array(time)  # .astype(np.float64)
         time = np.repeat(time, lon.size) if time.size == 1 else time
 
         def _convert_to_reltime(time):
@@ -597,6 +599,9 @@ def __iadd__(self, particles):
         self.add(particles)
         return self
 
+    def _create_progressbar_(self, starttime, endtime):
+        return self.__create_progressbar(starttime, endtime)
+
     def __create_progressbar(self, starttime, endtime):
         pbar = None
         try:
diff --git a/parcels/particleset_benchmark.py b/parcels/particleset_benchmark.py
new file mode 100644
index 0000000000..a2e7aa4f06
--- /dev/null
+++ b/parcels/particleset_benchmark.py
@@ -0,0 +1,496 @@
+import time as time_module
+from datetime import datetime
+from datetime import timedelta as delta
+import psutil
+import os
+from platform import system as system_name
+import matplotlib.pyplot as plt
+import sys
+
+
+import numpy as np
+
+try:
+    from mpi4py import MPI
+except:
+    MPI = None
+
+from parcels.compiler import GNUCompiler
+from parcels.kernels.advection import AdvectionRK4
+from parcels.particleset import ParticleSet
+from parcels.particle import JITParticle
+from parcels.kernel_benchmark import Kernel_Benchmark, Kernel
+from parcels.tools.loggers import logger
+from parcels.tools.performance_logger import TimingLog, ParamLogging, Asynchronous_ParamLogging
+
+from resource import getrusage, RUSAGE_SELF
+
+__all__ = ['ParticleSet_Benchmark']
+
+def measure_mem():
+    process = psutil.Process(os.getpid())
+    pmem = process.memory_info()
+    pmem_total = pmem.shared + pmem.text + pmem.data + pmem.lib
+    # print("psutil - res-set: {}; res-shr: {} res-text: {}, res-data: {}, res-lib: {}; res-total: {}".format(pmem.rss, pmem.shared, pmem.text, pmem.data, pmem.lib, pmem_total))
+    return pmem_total
+
+def measure_mem_rss():
+    process = psutil.Process(os.getpid())
+    pmem = process.memory_info()
+    pmem_total = pmem.shared + pmem.text + pmem.data + pmem.lib
+    # print("psutil - res-set: {}; res-shr: {} res-text: {}, res-data: {}, res-lib: {}; res-total: {}".format(pmem.rss, pmem.shared, pmem.text, pmem.data, pmem.lib, pmem_total))
+    return pmem.rss
+
+def measure_mem_usage():
+    rsc = getrusage(RUSAGE_SELF)
+    print("RUSAGE - Max. RES set-size: {}; shr. mem size: {}; ushr. mem size: {}".format(rsc.ru_maxrss, rsc.ru_ixrss, rsc.ru_idrss))
+    if system_name() == "Linux":
+        return rsc.ru_maxrss*1024
+    return rsc.ru_maxrss
+
+USE_ASYNC_MEMLOG = False
+USE_RUSE_SYNC_MEMLOG = False  # can be faulty
+
+class ParticleSet_Benchmark(ParticleSet):
+
+    def __init__(self, fieldset, pclass=JITParticle, lon=None, lat=None, depth=None, time=None, repeatdt=None,
+                 lonlatdepth_dtype=None, pid_orig=None, **kwargs):
+        super(ParticleSet_Benchmark, self).__init__(fieldset, pclass, lon, lat, depth, time, repeatdt, lonlatdepth_dtype, pid_orig, **kwargs)
+        self.total_log = TimingLog()
+        self.compute_log = TimingLog()
+        self.io_log = TimingLog()
+        self.mem_io_log = TimingLog()
+        self.plot_log = TimingLog()
+        self.nparticle_log = ParamLogging()
+        self.mem_log = ParamLogging()
+        self.async_mem_log = Asynchronous_ParamLogging()
+        self.process = psutil.Process(os.getpid())
+
+    def set_async_memlog_interval(self, interval):
+        self.async_mem_log.measure_interval = interval
+
+    # @profile
+    def execute(self, pyfunc=AdvectionRK4, endtime=None, runtime=None, dt=1.,
+                moviedt=None, recovery=None, output_file=None, movie_background_field=None,
+                verbose_progress=None, postIterationCallbacks=None, callbackdt=None):
+        """Execute a given kernel function over the particle set for
+        multiple timesteps. Optionally also provide sub-timestepping
+        for particle output.
+
+        :param pyfunc: Kernel function to execute. This can be the name of a
+                       defined Python function or a :class:`parcels.kernel.Kernel` object.
+                       Kernels can be concatenated using the + operator
+        :param endtime: End time for the timestepping loop.
+                        It is either a datetime object or a positive double.
+        :param runtime: Length of the timestepping loop. Use instead of endtime.
+                        It is either a timedelta object or a positive double.
+        :param dt: Timestep interval to be passed to the kernel.
+                   It is either a timedelta object or a double.
+                   Use a negative value for a backward-in-time simulation.
+        :param moviedt:  Interval for inner sub-timestepping (leap), which dictates
+                         the update frequency of animation.
+                         It is either a timedelta object or a positive double.
+                         None value means no animation.
+        :param output_file: :mod:`parcels.particlefile.ParticleFile` object for particle output
+        :param recovery: Dictionary with additional `:mod:parcels.tools.error`
+                         recovery kernels to allow custom recovery behaviour in case of
+                         kernel errors.
+        :param movie_background_field: field plotted as background in the movie if moviedt is set.
+                                       'vector' shows the velocity as a vector field.
+        :param verbose_progress: Boolean for providing a progress bar for the kernel execution loop.
+        :param postIterationCallbacks: (Optional) Array of functions that are to be called after each
+                                        iteration (post-process, non-Kernel)
+        :param callbackdt: (Optional, in conjecture with 'postIterationCallbacks) timestep inverval
+                            to (latestly) interrupt the running kernel and invoke post-iteration
+                            callbacks from 'postIterationCallbacks'.
+        """
+
+        # check if pyfunc has changed since last compile. If so, recompile
+        if self.kernel is None or (self.kernel.pyfunc is not pyfunc and self.kernel is not pyfunc):
+            # Generate and store Kernel
+            if isinstance(pyfunc, Kernel):
+                self.kernel = pyfunc
+            else:
+                self.kernel = self.Kernel(pyfunc)
+            # Prepare JIT kernel execution
+            if self.ptype.uses_jit:
+                self.kernel.remove_lib()
+                cppargs = ['-DDOUBLE_COORD_VARIABLES'] if self.lonlatdepth_dtype == np.float64 else None
+                self.kernel.compile(compiler=GNUCompiler(cppargs=cppargs))
+                self.kernel.load_lib()
+
+        # Convert all time variables to seconds
+        if isinstance(endtime, delta):
+            raise RuntimeError('endtime must be either a datetime or a double')
+        if isinstance(endtime, datetime):
+            endtime = np.datetime64(endtime)
+        if isinstance(endtime, np.datetime64):
+            if self.time_origin.calendar is None:
+                raise NotImplementedError('If fieldset.time_origin is not a date, execution endtime must be a double')
+            endtime = self.time_origin.reltime(endtime)
+        if isinstance(runtime, delta):
+            runtime = runtime.total_seconds()
+        if isinstance(dt, delta):
+            dt = dt.total_seconds()
+        outputdt = output_file.outputdt if output_file else np.infty
+        if isinstance(outputdt, delta):
+            outputdt = outputdt.total_seconds()
+        if isinstance(moviedt, delta):
+            moviedt = moviedt.total_seconds()
+        if isinstance(callbackdt, delta):
+            callbackdt = callbackdt.total_seconds()
+
+        assert runtime is None or runtime >= 0, 'runtime must be positive'
+        assert outputdt is None or outputdt >= 0, 'outputdt must be positive'
+        assert moviedt is None or moviedt >= 0, 'moviedt must be positive'
+
+        # Set particle.time defaults based on sign of dt, if not set at ParticleSet construction
+        mintime, maxtime = self.fieldset.gridset.dimrange('time_full') if self.fieldset is not None else (0, 1)
+        if np.any(np.isnan(self.particle_data['time'])):
+            self.particle_data['time'][np.isnan(self.particle_data['time'])] = mintime if dt >= 0 else maxtime
+
+        # for p in self:
+        #     if np.isnan(p.time):
+        #         mintime, maxtime = self.fieldset.gridset.dimrange('time_full')
+        #         p.time = mintime if dt >= 0 else maxtime
+
+        # Derive _starttime and endtime from arguments or fieldset defaults
+        if runtime is not None and endtime is not None:
+            raise RuntimeError('Only one of (endtime, runtime) can be specified')
+        # ====================================== #
+        # ==== EXPENSIVE LIST COMPREHENSION ==== #
+        # ====================================== #
+        # _starttime = min([p.time for p in self]) if dt >= 0 else max([p.time for p in self])
+        _starttime = self.particle_data['time'].min() if dt >= 0 else self.particle_data['time'].max()
+        if self.repeatdt is not None and self.repeat_starttime is None:
+            self.repeat_starttime = _starttime
+        if runtime is not None:
+            endtime = _starttime + runtime * np.sign(dt)
+        elif endtime is None:
+            mintime, maxtime = self.fieldset.gridset.dimrange('time_full')
+            endtime = maxtime if dt >= 0 else mintime
+
+        execute_once = False
+        if abs(endtime-_starttime) < 1e-5 or dt == 0 or runtime == 0:
+            dt = 0
+            runtime = 0
+            endtime = _starttime
+            logger.warning_once("dt or runtime are zero, or endtime is equal to Particle.time. "
+                                "The kernels will be executed once, without incrementing time")
+            execute_once = True
+
+        # Initialise particle timestepping
+        self.particle_data['dt'][:] = dt
+        # for p in self:
+        #     p.dt = dt
+
+        # First write output_file, because particles could have been added
+        if output_file:
+            output_file.write(self, _starttime)
+        if moviedt:
+            self.show(field=movie_background_field, show_time=_starttime, animation=True)
+
+        if moviedt is None:
+            moviedt = np.infty
+        if callbackdt is None:
+            interupt_dts = [np.infty, moviedt, outputdt]
+            if self.repeatdt is not None:
+                interupt_dts.append(self.repeatdt)
+            callbackdt = np.min(np.array(interupt_dts))
+        time = _starttime
+        if self.repeatdt:
+            next_prelease = self.repeat_starttime + (abs(time - self.repeat_starttime) // self.repeatdt + 1) * self.repeatdt * np.sign(dt)
+        else:
+            next_prelease = np.infty if dt > 0 else - np.infty
+        next_output = time + outputdt if dt > 0 else time - outputdt
+        next_movie = time + moviedt if dt > 0 else time - moviedt
+        next_callback = time + callbackdt if dt > 0 else time - callbackdt
+        # next_input = self.fieldset.computeTimeChunk(time, np.sign(dt))
+        next_input = self.fieldset.computeTimeChunk(time, np.sign(dt)) if self.fieldset is not None else np.inf
+
+        tol = 1e-12
+        walltime_start = None
+        if verbose_progress is None:
+            walltime_start = time_module.time()
+        pbar = None
+        if verbose_progress:
+            pbar = self._create_progressbar_(_starttime, endtime)
+
+        mem_used_start = 0
+        if USE_ASYNC_MEMLOG:
+            self.async_mem_log.measure_func = measure_mem
+            mem_used_start = measure_mem()
+
+        while (time < endtime and dt > 0) or (time > endtime and dt < 0) or dt == 0:
+            self.total_log.start_timing()
+            if USE_ASYNC_MEMLOG:
+                self.async_mem_log.measure_start_value = mem_used_start
+                self.async_mem_log.start_partial_measurement()
+            if verbose_progress is None and time_module.time() - walltime_start > 10:
+                # Showing progressbar if runtime > 10 seconds
+                if output_file:
+                    logger.info('Temporary output files are stored in %s.' % output_file.tempwritedir_base)
+                    logger.info('You can use "parcels_convert_npydir_to_netcdf %s" to convert these '
+                                'to a NetCDF file during the run.' % output_file.tempwritedir_base)
+                pbar = self._create_progressbar_(_starttime, endtime)
+                verbose_progress = True
+            if dt > 0:
+                time = min(next_prelease, next_input, next_output, next_movie, next_callback, endtime)
+            else:
+                time = max(next_prelease, next_input, next_output, next_movie, next_callback, endtime)
+            # ==== compute ==== #
+            if not isinstance(self.kernel, Kernel_Benchmark):
+                self.compute_log.start_timing()
+            self.kernel.execute(self, endtime=time, dt=dt, recovery=recovery, output_file=output_file, execute_once=execute_once)
+            if abs(time-next_prelease) < tol:
+                # creating new particles equals a memory-io operation
+                if not isinstance(self.kernel, Kernel_Benchmark):
+                    self.compute_log.stop_timing()
+                    self.compute_log.accumulate_timing()
+
+                self.mem_io_log.start_timing()
+                pset_new = ParticleSet(fieldset=self.fieldset, time=time, lon=self.repeatlon,
+                                       lat=self.repeatlat, depth=self.repeatdepth,
+                                       pclass=self.repeatpclass, lonlatdepth_dtype=self.lonlatdepth_dtype,
+                                       partitions=False, pid_orig=self.repeatpid, **self.repeatkwargs)
+                # p = pset_new.data_accessor()
+                # for i in range(pset_new.size):
+                #     p.set_index(i)
+                #     p.dt = dt
+                pset_new.particle_data['dt'][:] = dt
+                # for p in pset_new:
+                #     p.dt = dt
+                self.add(pset_new)
+                self.mem_io_log.stop_timing()
+                self.mem_io_log.accumulate_timing()
+                next_prelease += self.repeatdt * np.sign(dt)
+            else:
+                if not isinstance(self.kernel, Kernel_Benchmark):
+                    self.compute_log.stop_timing()
+                else:
+                    pass
+            if isinstance(self.kernel, Kernel_Benchmark):
+                self.compute_log.add_aux_measure(self.kernel.compute_timings.sum())
+                self.kernel.compute_timings.reset()
+                self.io_log.add_aux_measure(self.kernel.io_timings.sum())
+                self.kernel.io_timings.reset()
+                self.mem_io_log.add_aux_measure(self.kernel.mem_io_timings.sum())
+                self.kernel.mem_io_timings.reset()
+            self.compute_log.accumulate_timing()
+            self.nparticle_log.advance_iteration(self.size)
+            # ==== end compute ==== #
+            if abs(time-next_output) < tol:  # ==== IO ==== #
+                if output_file:
+                    self.io_log.start_timing()
+                    output_file.write(self, time)
+                    self.io_log.stop_timing()
+                    self.io_log.accumulate_timing()
+                next_output += outputdt * np.sign(dt)
+            if abs(time-next_movie) < tol:  # ==== Plotting ==== #
+                self.plot_log.start_timing()
+                self.show(field=movie_background_field, show_time=time, animation=True)
+                self.plot_log.stop_timing()
+                self.plot_log.accumulate_timing()
+                next_movie += moviedt * np.sign(dt)
+            # ==== insert post-process here to also allow for memory clean-up via external func ==== #
+            if abs(time-next_callback) < tol:
+                # ==== assuming post-processing functions largely use memory than hard computation ... ==== #
+                self.mem_io_log.start_timing()
+                if postIterationCallbacks is not None:
+                    for extFunc in postIterationCallbacks:
+                        extFunc()
+                self.mem_io_log.stop_timing()
+                self.mem_io_log.accumulate_timing()
+                next_callback += callbackdt * np.sign(dt)
+            if time != endtime:  # ==== IO ==== #
+                self.io_log.start_timing()
+                next_input = self.fieldset.computeTimeChunk(time, dt)
+                self.io_log.stop_timing()
+                self.io_log.accumulate_timing()
+            if dt == 0:
+                break
+            if verbose_progress:  # ==== Plotting ==== #
+                self.plot_log.start_timing()
+                pbar.update(abs(time - _starttime))
+                self.plot_log.stop_timing()
+                self.plot_log.accumulate_timing()
+            self.total_log.stop_timing()
+            self.total_log.accumulate_timing()
+            mem_B_used_total = 0
+            # if MPI:
+            #     mpi_comm = MPI.COMM_WORLD
+            #     mem_B_used = self.process.memory_info().rss
+            #     mem_B_used_total = mpi_comm.reduce(mem_B_used, op=MPI.SUM, root=0)
+            # else:
+            #     mem_B_used_total = self.process.memory_info().rss
+            # mem_B_used_total = self.process.memory_info().rss
+            mem_B_used_total = 0
+            if USE_RUSE_SYNC_MEMLOG:
+                mem_B_used_total = measure_mem_usage()
+            else:
+                mem_B_used_total = measure_mem_rss()
+            self.mem_log.advance_iteration(mem_B_used_total)
+            if USE_ASYNC_MEMLOG:
+                self.async_mem_log.stop_partial_measurement()  # does 'advance_iteration' internally
+
+            self.compute_log.advance_iteration()
+            self.io_log.advance_iteration()
+            self.mem_io_log.advance_iteration()
+            self.plot_log.advance_iteration()
+            self.total_log.advance_iteration()
+
+        if output_file:
+            self.io_log.start_timing()
+            output_file.write(self, time)
+            self.io_log.stop_timing()
+            self.io_log.accumulate_timing()
+        if verbose_progress:
+            self.plot_log.start_timing()
+            pbar.finish()
+            self.plot_log.stop_timing()
+            self.plot_log.accumulate_timing()
+
+        # self.nparticle_log.advance_iteration(self.size)
+        # self.compute_log.advance_iteration()
+        # self.io_log.advance_iteration()
+        # self.mem_log.advance_iteration(self.process.memory_info().rss)
+        # self.mem_io_log.advance_iteration()
+        # self.plot_log.advance_iteration()
+        # self.total_log.advance_iteration()
+
+    def Kernel(self, pyfunc, c_include="", delete_cfiles=True):
+        """Wrapper method to convert a `pyfunc` into a :class:`parcels.kernel_benchmark.Kernel` object
+        based on `fieldset` and `ptype` of the ParticleSet
+        :param delete_cfiles: Boolean whether to delete the C-files after compilation in JIT mode (default is True)
+        """
+        return Kernel_Benchmark(self.fieldset, self.ptype, pyfunc=pyfunc, c_include=c_include,
+                                delete_cfiles=delete_cfiles)
+
+    def plot_and_log(self, total_times=None, compute_times=None, io_times=None, plot_times=None, memory_used=None, nparticles=None, target_N=1, imageFilePath="", odir=os.getcwd(), xlim_range=None, ylim_range=None):
+        # == do something with the log-arrays == #
+        if total_times is None or type(total_times) not in [list, dict, np.ndarray]:
+            total_times = self.total_log.get_values()
+        if not isinstance(total_times, np.ndarray):
+            total_times = np.array(total_times)
+        if compute_times is None or type(compute_times) not in [list, dict, np.ndarray]:
+            compute_times = self.compute_log.get_values()
+        if not isinstance(compute_times, np.ndarray):
+            compute_times = np.array(compute_times)
+        mem_io_times = None
+        if io_times is None or type(io_times) not in [list, dict, np.ndarray]:
+            io_times = self.io_log.get_values()
+            mem_io_times = self.mem_io_log.get_values()
+        if not isinstance(io_times, np.ndarray):
+            io_times = np.array(io_times)
+        if mem_io_times is not None:
+            mem_io_times = np.array(mem_io_times)
+            io_times += mem_io_times
+        if plot_times is None or type(plot_times) not in [list, dict, np.ndarray]:
+            plot_times = self.plot_log.get_values()
+        if not isinstance(plot_times, np.ndarray):
+            plot_times = np.array(plot_times)
+        if memory_used is None or type(memory_used) not in [list, dict, np.ndarray]:
+            memory_used = self.mem_log.get_params()
+        if not isinstance(memory_used, np.ndarray):
+            memory_used = np.array(memory_used)
+        if nparticles is None or type(nparticles) not in [list, dict, np.ndarray]:
+            nparticles = []
+        if not isinstance(nparticles, np.ndarray):
+            nparticles = np.array(nparticles, dtype=np.int32)
+
+        memory_used_async = None
+        if USE_ASYNC_MEMLOG:
+            memory_used_async = np.array(self.async_mem_log.get_params(), dtype=np.int64)
+
+        t_scaler = 1. * 10./1.0
+        npart_scaler = 1.0 / 1000.0
+        mem_scaler = 1.0 / (1024 * 1024 * 1024)
+        plot_t = (total_times * t_scaler).tolist()
+        plot_ct = (compute_times * t_scaler).tolist()
+        plot_iot = (io_times * t_scaler).tolist()
+        plot_drawt = (plot_times * t_scaler).tolist()
+        plot_npart = (nparticles * npart_scaler).tolist()
+        plot_mem = []
+        if memory_used is not None and len(memory_used) > 1:
+            plot_mem = (memory_used * mem_scaler).tolist()
+
+        plot_mem_async = None
+        if USE_ASYNC_MEMLOG:
+            plot_mem_async = (memory_used_async * mem_scaler).tolist()
+
+        do_iot_plot = True
+        do_drawt_plot = False
+        do_mem_plot = True
+        do_mem_plot_async = True
+        do_npart_plot = True
+        assert (len(plot_t) == len(plot_ct))
+        if len(plot_t) != len(plot_iot):
+            print("plot_t and plot_iot have different lengths ({} vs {})".format(len(plot_t), len(plot_iot)))
+            do_iot_plot = False
+        if len(plot_t) != len(plot_drawt):
+            print("plot_t and plot_drawt have different lengths ({} vs {})".format(len(plot_t), len(plot_iot)))
+            do_drawt_plot = False
+        if len(plot_t) != len(plot_mem):
+            print("plot_t and plot_mem have different lengths ({} vs {})".format(len(plot_t), len(plot_mem)))
+            do_mem_plot = False
+        if len(plot_t) != len(plot_npart):
+            print("plot_t and plot_npart have different lengths ({} vs {})".format(len(plot_t), len(plot_npart)))
+            do_npart_plot = False
+        x = np.arange(start=0, stop=len(plot_t))
+
+        fig, ax = plt.subplots(1, 1, figsize=(21, 12))
+        ax.plot(x, plot_t, 's-', label="total time_spent [100ms]")
+        ax.plot(x, plot_ct, 'o-', label="compute-time spent [100ms]")
+        if do_iot_plot:
+            ax.plot(x, plot_iot, 'o-', label="io-time spent [100ms]")
+        if do_drawt_plot:
+            ax.plot(x, plot_drawt, 'o-', label="draw-time spent [100ms]")
+        if (memory_used is not None) and do_mem_plot:
+            ax.plot(x, plot_mem, '.-', label="memory_used (cumulative) [1 GB]")
+        if USE_ASYNC_MEMLOG:
+            if (memory_used_async is not None) and do_mem_plot_async:
+                ax.plot(x, plot_mem_async, 'x-', label="memory_used [async] (cum.) [1GB]")
+        if do_npart_plot:
+            ax.plot(x, plot_npart, '-', label="sim. particles [# 1000]")
+        if xlim_range is not None:
+            plt.xlim(list(xlim_range))  # [0, 730]
+        if ylim_range is not None:
+            plt.ylim(list(ylim_range))  # [0, 120]
+        plt.legend()
+        ax.set_xlabel('iteration')
+        plt.savefig(os.path.join(odir, imageFilePath), dpi=600, format='png')
+
+        sys.stdout.write("cumulative total runtime: {}\n".format(total_times.sum()))
+        sys.stdout.write("cumulative compute time: {}\n".format(compute_times.sum()))
+        sys.stdout.write("cumulative I/O time: {}\n".format(io_times.sum()))
+        sys.stdout.write("cumulative plot time: {}\n".format(plot_times.sum()))
+
+        csv_file = os.path.splitext(imageFilePath)[0]+".csv"
+        with open(os.path.join(odir, csv_file), 'w') as f:
+            nparticles_t0 = 0
+            nparticles_tN = 0
+            if nparticles is not None:
+                nparticles_t0 = nparticles[0]
+                nparticles_tN = nparticles[-1]
+            ncores = 1
+            if MPI:
+                mpi_comm = MPI.COMM_WORLD
+                ncores = mpi_comm.Get_size()
+            header_string = "target_N, start_N, final_N, avg_N, ncores, avg_kt_total[s], avg_kt_compute[s], avg_kt_io[s], avg_kt_plot[s], cum_t_total[s], cum_t_compute[s], com_t_io[s], cum_t_plot[s], max_mem[MB]\n"
+            f.write(header_string)
+            data_string = "{}, {}, {}, {}, {}, ".format(target_N, nparticles_t0, nparticles_tN, nparticles.mean(), ncores)
+            data_string += "{:2.10f}, {:2.10f}, {:2.10f}, {:2.10f}, ".format(total_times.mean(), compute_times.mean(), io_times.mean(), plot_times.mean())
+            max_mem_sync = 0
+            if memory_used is not None and len(memory_used) > 1:
+                memory_used = np.floor(memory_used / (1024*1024))
+                memory_used = memory_used.astype(dtype=np.uint32)
+                max_mem_sync = memory_used.max()
+            max_mem_async = 0
+            if USE_ASYNC_MEMLOG:
+                if memory_used_async is not None and len(memory_used_async) > 1:
+                    memory_used_async = np.floor(memory_used_async / (1024*1024))
+                    memory_used_async = memory_used_async.astype(dtype=np.int64)
+                    max_mem_async = memory_used_async.max()
+            max_mem = max(max_mem_sync, max_mem_async)
+            data_string += "{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {}".format(total_times.sum(), compute_times.sum(), io_times.sum(), plot_times.sum(), max_mem)
+            f.write(data_string)
diff --git a/parcels/tools/__init__.py b/parcels/tools/__init__.py
index 8e847fee26..0ab10cddd9 100644
--- a/parcels/tools/__init__.py
+++ b/parcels/tools/__init__.py
@@ -3,3 +3,4 @@
 from .interpolation_utils import *  # noqa
 from .loggers import *  # noqa
 from .timer import *  # noqa
+# from .performance_logger import *  # noga
diff --git a/parcels/tools/performance_logger.py b/parcels/tools/performance_logger.py
new file mode 100644
index 0000000000..8d51c28378
--- /dev/null
+++ b/parcels/tools/performance_logger.py
@@ -0,0 +1,322 @@
+import time as time_module
+import numpy as np
+
+try:
+    from mpi4py import MPI
+except:
+    MPI = None
+
+from threading import Thread
+from threading import Event
+from time import sleep
+
+class TimingLog():
+    stime = 0
+    etime = 0
+    mtime = 0
+    _samples = []
+    _times_steps = []
+    _iter = 0
+
+    def __init__(self):
+        self.stime = 0
+        self.etime = 0
+        self.mtime = 0
+        self._samples = []
+        self._times_steps = []
+        self._iter = 0
+
+    @property
+    def timing(self):
+        return self._times_steps
+
+    @property
+    def samples(self):
+        return self._samples
+
+    def __len__(self):
+        return len(self._samples)
+
+    def get_values(self):
+        return self._times_steps
+
+    def get_value(self, index):
+        N = len(self._times_steps)
+        result = 0
+        if N > 0:
+            result = self._times_steps[min(max(index, 0), N - 1)]
+        return result
+
+    def start_timing(self):
+        if MPI:
+            mpi_comm = MPI.COMM_WORLD
+            mpi_rank = mpi_comm.Get_rank()
+            if mpi_rank == 0:
+                # self.stime = MPI.Wtime()
+                # self.stime = time_module.perf_counter()
+                self.stime = time_module.process_time()
+        else:
+            self.stime = time_module.process_time()
+
+    def stop_timing(self):
+        if MPI:
+            mpi_comm = MPI.COMM_WORLD
+            mpi_rank = mpi_comm.Get_rank()
+            if mpi_rank == 0:
+                # self.etime = MPI.Wtime()
+                # self.etime = time_module.perf_counter()
+                self.etime = time_module.process_time()
+        else:
+            self.etime = time_module.process_time()
+
+    def accumulate_timing(self):
+        if MPI:
+            mpi_comm = MPI.COMM_WORLD
+            mpi_rank = mpi_comm.Get_rank()
+            if mpi_rank == 0:
+                self.mtime += (self.etime-self.stime)
+            else:
+                self.mtime = 0
+        else:
+            self.mtime += (self.etime-self.stime)
+
+    def advance_iteration(self):
+        if MPI:
+            mpi_comm = MPI.COMM_WORLD
+            mpi_rank = mpi_comm.Get_rank()
+            if mpi_rank == 0:
+                self._times_steps.append(self.mtime)
+                self._samples.append(self._iter)
+                self._iter += 1
+            self.mtime = 0
+        else:
+            self._times_steps.append(self.mtime)
+            self._samples.append(self._iter)
+            self._iter += 1
+            self.mtime = 0
+
+    def add_aux_measure(self, value):
+        if MPI:
+            mpi_comm = MPI.COMM_WORLD
+            mpi_rank = mpi_comm.Get_rank()
+            if mpi_rank == 0:
+                self.mtime += value
+            else:
+                self.mtime += 0
+        else:
+            self.mtime += value
+
+    def sum(self):
+        if MPI:
+            mpi_comm = MPI.COMM_WORLD
+            mpi_rank = mpi_comm.Get_rank()
+            if mpi_rank == 0:
+                result = np.array(self._times_steps).sum()
+                return result
+            else:
+                return 0
+        else:
+            result = np.array(self._times_steps).sum()
+
+    def reset(self):
+        if self._times_steps is not None and len(self._times_steps) > 0:
+            del self._times_steps[:]
+        if self._samples is not None and len(self._samples) > 0:
+            del self._samples[:]
+        self.stime = 0
+        self.etime = 0
+        self.mtime = 0
+        self._samples = []
+        self._times_steps = []
+        self._iter = 0
+
+
+class ParamLogging():
+    _samples = []
+    _params = []
+    _iter = 0
+
+    def __init__(self):
+        self._samples = []
+        self._params = []
+        self._iter = 0
+
+    @property
+    def samples(self):
+        return self._samples
+
+    @property
+    def params(self):
+        return self._params
+
+    def get_params(self):
+        return self._params
+
+    def get_param(self, index):
+        N = len(self._params)
+        result = 0
+        if N > 0:
+            result = self._params[min(max(index, 0), N-1)]
+        return result
+
+    def __len__(self):
+        return len(self._samples)
+
+    def advance_iteration(self, param):
+        if MPI:
+            # mpi_comm = MPI.COMM_WORLD
+            # mpi_rank = mpi_comm.Get_rank()
+
+            self._params.append(param)
+            self._samples.append(self._iter)
+            self._iter += 1
+            # if mpi_rank == 0:
+            #     self._params.append(param)
+            #     self._samples.append(self._iter)
+            #     self._iter += 1
+        else:
+            self._params.append(param)
+            self._samples.append(self._iter)
+            self._iter += 1
+
+class Asynchronous_ParamLogging():
+    _samples = []
+    _params = []
+    _iter = 0
+    _measure_func = None
+    _measure_start_value = None  # for differential measurements
+    _measure_partial_values = []
+    _measure_interval = 0.25  # 250 ms
+    _event = None
+    _thread = None
+    differential_measurement = False
+
+    def __init__(self):
+        self._samples = []
+        self._params = []
+        self._measure_partial_values = []
+        self._iter = 0
+        self._measure_func = None
+        self._measure_start_value = None
+        self._measure_interval = 0.25  # 250 ms
+        self._event = None
+        self._thread = None
+        self.differential_measurement = False
+
+    def __del__(self):
+        del self._samples[:]
+        del self._params[:]
+        del self._measure_partial_values[:]
+
+    @property
+    def samples(self):
+        return self._samples
+
+    @property
+    def params(self):
+        return self._params
+
+    @property
+    def measure_func(self):
+        return self._measure_func
+
+    @measure_func.setter
+    def measure_func(self, function):
+        self._measure_func = function
+
+    @property
+    def measure_interval(self):
+        return self._measure_interval
+
+    @measure_interval.setter
+    def measure_interval(self, interval):
+        """
+        Set measure interval in seconds
+        :param interval: interval in seconds (fractional possible)
+        :return: None
+        """
+        self._measure_interval = interval
+
+    @property
+    def measure_start_value(self):
+        return self._measure_start_value
+
+    @measure_start_value.setter
+    def measure_start_value(self, value):
+        self._measure_start_value = value
+
+    def async_run(self):
+        if self.differential_measurement:
+            self.async_run_diff_measurement()
+        else:
+            pass
+
+    def async_run_diff_measurement(self):
+        if self._measure_start_value is None:
+            self._measure_start_value = self._measure_func()
+            self._measure_partial_values.append(0)
+        while not self._event.is_set():
+            self._measure_partial_values.append( self._measure_func()-self._measure_start_value )
+            sleep(self._measure_interval)
+
+    def async_run_measurement(self):
+        while not self._event.is_set():
+            self._measure_partial_values.append( self.measure_func() )
+            sleep(self.measure_interval)
+
+    def start_partial_measurement(self):
+        assert self._measure_func is not None, "Measurement function is None - invalid. Exiting ..."
+        assert self._thread is None, "Measurement already running - double-start invalid. Exiting ..."
+        if len(self._measure_partial_values) > 0:
+            del self._measure_partial_values[:]
+            self._measure_partial_values = []
+        self._event = Event()
+        self._thread = Thread(target=self.async_run_measurement)
+        self._thread.start()
+
+    def stop_partial_measurement(self):
+        """
+        function to stop the measurement. The function also internally advances the iteration with the mean (or max)
+        of the measured partial values.
+        :return: None
+        """
+        self._event.set()
+        self._thread.join()
+        sleep(self._measure_interval)
+        del self._thread
+        self._thread = None
+        self._measure_start_value = None
+        # param_partial_mean = np.array(self._measure_partial_values).mean()
+        param_partial_mean = np.array(self._measure_partial_values).max()
+        self.advance_iteration(param_partial_mean)
+
+    def get_params(self):
+        return self._params
+
+    def get_param(self, index):
+        N = len(self._params)
+        result = 0
+        if N > 0:
+            result = self._params[min(max(index, 0), N-1)]
+        return result
+
+    def __len__(self):
+        return len(self._samples)
+
+    def advance_iteration(self, param):
+        if MPI:
+            # mpi_comm = MPI.COMM_WORLD
+            # mpi_rank = mpi_comm.Get_rank()
+
+            self._params.append(param)
+            self._samples.append(self._iter)
+            self._iter += 1
+            # if mpi_rank == 0:
+            #     self._params.append(param)
+            #     self._samples.append(self._iter)
+            #     self._iter += 1
+        else:
+            self._params.append(param)
+            self._samples.append(self._iter)
+            self._iter += 1
+
diff --git a/parcels/tools/perlin2d.py b/parcels/tools/perlin2d.py
new file mode 100644
index 0000000000..4b39b4cf94
--- /dev/null
+++ b/parcels/tools/perlin2d.py
@@ -0,0 +1,81 @@
+import numpy as np
+from numpy.random import MT19937
+from numpy.random import RandomState, SeedSequence
+from scipy import ndimage
+from time import time
+
+
+def generate_perlin_noise_2d(shape, res):
+    def f(t):
+        return 6*t**5 - 15*t**4 + 10*t**3
+
+    delta = (res[0] / shape[0], res[1] / shape[1])
+    d = (shape[0] // res[0], shape[1] // res[1])
+    grid = np.mgrid[0:res[0]:delta[0], 0:res[1]:delta[1]].transpose(1, 2, 0) % 1
+    # Gradients
+    angles = 2*np.pi*np.random.rand(res[0]+1, res[1]+1)
+    gradients = np.dstack((np.cos(angles), np.sin(angles)))
+    g00 = gradients[0:-1, 0:-1].repeat(d[0], 0).repeat(d[1], 1)
+    g10 = gradients[1:, 0:-1].repeat(d[0], 0).repeat(d[1], 1)
+    g01 = gradients[0:-1, 1:].repeat(d[0], 0).repeat(d[1], 1)
+    g11 = gradients[1:, 1:].repeat(d[0], 0).repeat(d[1], 1)
+    # Ramps
+    n00 = np.sum(np.dstack((grid[:, :, 0], grid[:, :, 1])) * g00, 2)
+    n10 = np.sum(np.dstack((grid[:, :, 0]-1, grid[:, :, 1])) * g10, 2)
+    n01 = np.sum(np.dstack((grid[:, :, 0], grid[:, :, 1]-1)) * g01, 2)
+    n11 = np.sum(np.dstack((grid[:, :, 0]-1, grid[:, :, 1]-1)) * g11, 2)
+    # Interpolation
+    t = f(grid)
+    n0 = n00*(1-t[:, :, 0]) + t[:, :, 0]*n10
+    n1 = n01*(1-t[:, :, 0]) + t[:, :, 0]*n11
+    return np.sqrt(2)*((1-t[:, :, 1])*n0 + t[:, :, 1]*n1)
+
+
+def generate_fractal_noise_2d(shape, res, octaves=1, persistence=0.5):
+    RandomState(MT19937(SeedSequence(int(round(time() * 1000)))))  # rs =
+    noise = np.zeros(shape)
+    frequency = 1
+    amplitude = 1
+    for m_i in range(octaves):
+        noise += amplitude * generate_perlin_noise_2d(shape, (frequency*res[0], frequency*res[1]))
+        frequency *= 2
+        amplitude *= persistence
+    return noise
+
+
+def generate_fractal_noise_temporal2d(shape, tsteps, res, octaves=1, persistence=0.5, max_shift=((-1, 1), (-1, 1))):
+    RandomState(MT19937(SeedSequence(int(round(time() * 1000)))))  # rs =
+    noise = np.zeros(shape)
+    frequency = 1
+    amplitude = 1
+    for m_i in range(octaves):
+        noise += amplitude * generate_perlin_noise_2d(shape, (frequency*res[0], frequency*res[1]))
+        frequency *= 2
+        amplitude *= persistence
+
+    ishape = (tsteps, )+shape
+    result = np.zeros(ishape)
+    timage = np.zeros(noise.shape)
+    for i in range(0, tsteps):
+        result[i, :, :] = noise
+        sx = np.random.randint(max_shift[0][0], max_shift[0][1], dtype=np.int32)
+        sy = np.random.randint(max_shift[1][0], max_shift[1][1], dtype=np.int32)
+        ndimage.shift(noise, (sx, sy), timage, order=3, mode='mirror')
+        noise = timage
+    return result
+
+
+if __name__ == '__main__':
+    import matplotlib.pyplot as plt
+
+    np.random.seed(0)
+    noise = generate_perlin_noise_2d((256, 256), (8, 8))
+    plt.imshow(noise, cmap='gray', interpolation='lanczos')
+    plt.colorbar()
+
+    np.random.seed(0)
+    noise = generate_fractal_noise_2d((256, 256), (8, 8), 5)
+    plt.figure()
+    plt.imshow(noise, cmap='gray', interpolation='lanczos')
+    plt.colorbar()
+    plt.show()
diff --git a/parcels/tools/perlin3d.py b/parcels/tools/perlin3d.py
new file mode 100644
index 0000000000..c24306779f
--- /dev/null
+++ b/parcels/tools/perlin3d.py
@@ -0,0 +1,92 @@
+import numpy as np
+from numpy.random import MT19937
+from numpy.random import RandomState, SeedSequence
+from scipy import ndimage
+from time import time
+
+
+def generate_perlin_noise_3d(shape, res):
+    def f(t):
+        return 6*t**5 - 15*t**4 + 10*t**3
+
+    delta = (res[0] / shape[0], res[1] / shape[1], res[2] / shape[2])
+    d = (shape[0] // res[0], shape[1] // res[1], shape[2] // res[2])
+    grid = np.mgrid[0:res[0]:delta[0], 0:res[1]:delta[1], 0:res[2]:delta[2]]
+    grid = grid.transpose(1, 2, 3, 0) % 1
+    # Gradients
+    theta = 2*np.pi*np.random.rand(res[0]+1, res[1]+1, res[2]+1)
+    phi = 2*np.pi*np.random.rand(res[0]+1, res[1]+1, res[2]+1)
+    gradients = np.stack((np.sin(phi)*np.cos(theta), np.sin(phi)*np.sin(theta), np.cos(phi)), axis=3)
+    g000 = gradients[0:-1, 0:-1, 0:-1].repeat(d[0], 0).repeat(d[1], 1).repeat(d[2], 2)
+    g100 = gradients[1:, 0:-1, 0:-1].repeat(d[0], 0).repeat(d[1], 1).repeat(d[2], 2)
+    g010 = gradients[0:-1, 1:, 0:-1].repeat(d[0], 0).repeat(d[1], 1).repeat(d[2], 2)
+    g110 = gradients[1:, 1:, 0:-1].repeat(d[0], 0).repeat(d[1], 1).repeat(d[2], 2)
+    g001 = gradients[0:-1, 0:-1, 1:].repeat(d[0], 0).repeat(d[1], 1).repeat(d[2], 2)
+    g101 = gradients[1:, 0:-1, 1:].repeat(d[0], 0).repeat(d[1], 1).repeat(d[2], 2)
+    g011 = gradients[0:-1, 1:, 1:].repeat(d[0], 0).repeat(d[1], 1).repeat(d[2], 2)
+    g111 = gradients[1:, 1:, 1:].repeat(d[0], 0).repeat(d[1], 1).repeat(d[2], 2)
+    # Ramps
+    n000 = np.sum(np.stack((grid[:, :, :, 0], grid[:, :, :, 1], grid[:, :, :, 2]), axis=3) * g000, 3)
+    n100 = np.sum(np.stack((grid[:, :, :, 0]-1, grid[:, :, :, 1], grid[:, :, :, 2]), axis=3) * g100, 3)
+    n010 = np.sum(np.stack((grid[:, :, :, 0], grid[:, :, :, 1]-1, grid[:, :, :, 2]), axis=3) * g010, 3)
+    n110 = np.sum(np.stack((grid[:, :, :, 0]-1, grid[:, :, :, 1]-1, grid[:, :, :, 2]), axis=3) * g110, 3)
+    n001 = np.sum(np.stack((grid[:, :, :, 0], grid[:, :, :, 1], grid[:, :, :, 2]-1), axis=3) * g001, 3)
+    n101 = np.sum(np.stack((grid[:, :, :, 0]-1, grid[:, :, :, 1], grid[:, :, :, 2]-1), axis=3) * g101, 3)
+    n011 = np.sum(np.stack((grid[:, :, :, 0], grid[:, :, :, 1]-1, grid[:, :, :, 2]-1), axis=3) * g011, 3)
+    n111 = np.sum(np.stack((grid[:, :, :, 0]-1, grid[:, :, :, 1]-1, grid[:, :, :, 2]-1), axis=3) * g111, 3)
+    # Interpolation
+    t = f(grid)
+    n00 = n000*(1-t[:, :, :, 0]) + t[:, :, :, 0]*n100
+    n10 = n010*(1-t[:, :, :, 0]) + t[:, :, :, 0]*n110
+    n01 = n001*(1-t[:, :, :, 0]) + t[:, :, :, 0]*n101
+    n11 = n011*(1-t[:, :, :, 0]) + t[:, :, :, 0]*n111
+    n0 = (1-t[:, :, :, 1])*n00 + t[:, :, :, 1]*n10
+    n1 = (1-t[:, :, :, 1])*n01 + t[:, :, :, 1]*n11
+    return (1-t[:, :, :, 2])*n0 + t[:, :, :, 2]*n1
+
+
+def generate_fractal_noise_3d(shape, res, octaves=1, persistence=0.5):
+    RandomState(MT19937(SeedSequence(int(round(time() * 1000)))))
+    noise = np.zeros(shape)
+    frequency = 1
+    amplitude = 1
+    for _ in range(octaves):
+        noise += amplitude * generate_perlin_noise_3d(shape, (frequency*res[0], frequency*res[1], frequency*res[2]))
+        frequency *= 2
+        amplitude *= persistence
+    return noise
+
+
+def generate_fractal_noise_temporal3d(shape, tsteps, res, octaves=1, persistence=0.5, max_shift=(1, 1, 1)):
+    RandomState(MT19937(SeedSequence(int(round(time() * 1000)))))
+    noise = np.zeros(shape)
+    frequency = 1
+    amplitude = 1
+    for _ in range(octaves):
+        noise += amplitude * generate_perlin_noise_3d(shape, (frequency*res[0], frequency*res[1], frequency*res[2]))
+        frequency *= 2
+        amplitude *= persistence
+    ishape = (tsteps, )+shape
+    result = np.zeros(ishape)
+    timage = np.zeros(noise.shape)
+    for i in range(0, tsteps):
+        result[i, :, :] = noise
+        sx = np.random.randint(-max_shift[0], max_shift[0], dtype=np.int32)
+        sy = np.random.randint(-max_shift[1], max_shift[1], dtype=np.int32)
+        sz = np.random.randint(-max_shift[2], max_shift[2], dtype=np.int32)
+        ndimage.shift(noise, (sx, sy, sz), timage, order=3, mode='mirror')
+        noise = timage
+    return result
+
+
+if __name__ == '__main__':
+    import matplotlib.pyplot as plt
+    import matplotlib.animation as animation
+
+    np.random.seed(0)
+    noise = generate_fractal_noise_3d((32, 256, 256), (1, 4, 4), 4)
+
+    fig = plt.figure()
+    images = [[plt.imshow(layer, cmap='gray', interpolation='lanczos', animated=True)] for layer in noise]
+    animation = animation.ArtistAnimation(fig, images, interval=50, blit=True)
+    plt.show()
diff --git a/performance/benchmark_CMEMS.py b/performance/benchmark_CMEMS.py
new file mode 100644
index 0000000000..c16fe38ac0
--- /dev/null
+++ b/performance/benchmark_CMEMS.py
@@ -0,0 +1,376 @@
+"""
+Author: Dr. Christian Kehl
+Date: 11-02-2020
+"""
+
+from parcels import AdvectionEE, AdvectionRK45, AdvectionRK4_3D
+from parcels import FieldSet, ScipyParticle, JITParticle, Variable, AdvectionRK4, StateCode, OperationCode, ErrorCode
+from parcels.particleset_benchmark import ParticleSet_Benchmark as ParticleSet
+from parcels.field import VectorField, NestedField, SummedField
+# from parcels import plotTrajectoriesFile_loadedField
+# from parcels import rng as random
+from parcels import ParcelsRandom
+from datetime import timedelta as delta
+from datetime import datetime
+from argparse import ArgumentParser
+import numpy as np
+import fnmatch
+# import dask as da
+# import dask.array as daArray
+from glob import glob
+import time as ostime
+import math
+import matplotlib.pyplot as plt
+import sys
+import os
+import psutil
+import gc
+try:
+    from mpi4py import MPI
+except:
+    MPI = None
+
+with_GC = False
+
+import warnings
+import xarray as xr
+warnings.simplefilter("ignore", category=xr.SerializationWarning)
+
+ptype = {'scipy': ScipyParticle, 'jit': JITParticle}
+method = {'RK4': AdvectionRK4, 'EE': AdvectionEE, 'RK45': AdvectionRK45}
+global_t_0 = 0
+odir = ""
+
+
+def create_CMEMS_fieldset(datahead, periodic_wrap):
+    ddir = os.path.join(datahead, "CMEMS/GLOBAL_REANALYSIS_PHY_001_030/")
+    files = sorted(glob(ddir+"mercatorglorys12v1_gl12_mean_201607*.nc"))
+    variables = {'U': 'uo', 'V': 'vo'}
+    dimensions = {'lon': 'longitude', 'lat': 'latitude', 'time': 'time'}
+    # chs = False
+    chs = 'auto'
+    if periodic_wrap:
+        return FieldSet.from_netcdf(files, variables, dimensions, chunksize=chs, time_periodic=delta(days=31))
+    else:
+        return FieldSet.from_netcdf(files, variables, dimensions, chunksize=chs, allow_time_extrapolation=True)
+
+
+class AgeParticle_JIT(JITParticle):
+    age = Variable('age', dtype=np.float64, initial=0.0)
+    life_expectancy = Variable('life_expectancy', dtype=np.float64, initial=np.finfo(np.float64).max)
+    initialized_dynamic = Variable('initialized_dynamic', dtype=np.int32, initial=0)
+
+class AgeParticle_SciPy(ScipyParticle):
+    age = Variable('age', dtype=np.float64, initial=0.0)
+    life_expectancy = Variable('life_expectancy', dtype=np.float64, initial=np.finfo(np.float64).max)
+    initialized_dynamic = Variable('initialized_dynamic', dtype=np.int32, initial=0)
+
+age_ptype = {'scipy': AgeParticle_SciPy, 'jit': AgeParticle_JIT}
+
+def periodicBC(particle, fieldSet, time):
+    if particle.lon > 180.0:
+        particle.lon -= 360.0
+    if particle.lon < -180.0:
+        particle.lon += 360.0
+    particle.lat = min(particle.lat, 90.0)
+    particle.lat = max(particle.lat, -80.0)
+    # if particle.lat > 90.0:
+    #     particle.lat -= 170.0
+    # if particle.lat < -80.0:
+    #     particle.lat += 170.0
+
+def initialize(particle, fieldset, time):
+    if particle.initialized_dynamic < 1:
+        particle.life_expectancy = time + ParcelsRandom.uniform(.0, fieldset.life_expectancy) * math.sqrt(3.0 / 2.0)
+        # particle.life_expectancy = time+ParcelsRandom.uniform(.0, fieldset.life_expectancy) * ((3.0/2.0)**2.0)
+        particle.initialized_dynamic = 1
+
+def Age(particle, fieldset, time):
+    if particle.state == StateCode.Evaluate:
+        particle.age = particle.age + math.fabs(particle.dt)
+    if particle.age > particle.life_expectancy:
+        particle.delete()
+
+def DeleteParticle(particle, fieldset, time):
+    particle.delete()
+
+def RenewParticle(particle, fieldset, time):
+    particle.lat = np.random.rand() * 360.0 -180.0
+    particle.lon = np.random.rand() * 170.0 -80.0
+
+def perIterGC():
+    gc.collect()
+
+if __name__=='__main__':
+    parser = ArgumentParser(description="Example of particle advection using in-memory stommel test case")
+    parser.add_argument("-i", "--imageFileName", dest="imageFileName", type=str, default="mpiChunking_plot_MPI.png", help="image file name of the plot")
+    parser.add_argument("-b", "--backwards", dest="backwards", action='store_true', default=False, help="enable/disable running the simulation backwards")
+    parser.add_argument("-p", "--periodic", dest="periodic", action='store_true', default=False, help="enable/disable periodic wrapping (else: extrapolation)")
+    parser.add_argument("-r", "--release", dest="release", action='store_true', default=False, help="continuously add particles via repeatdt (default: False)")
+    parser.add_argument("-rt", "--releasetime", dest="repeatdt", type=int, default=720, help="repeating release rate of added particles in Minutes (default: 720min = 12h)")
+    parser.add_argument("-a", "--aging", dest="aging", action='store_true', default=False, help="Removed aging particles dynamically (default: False)")
+    parser.add_argument("-t", "--time_in_days", dest="time_in_days", type=int, default=1, help="runtime in days (default: 1)")
+    parser.add_argument("-x", "--xarray", dest="use_xarray", action='store_true', default=False, help="use xarray as data backend")
+    parser.add_argument("-w", "--writeout", dest="write_out", action='store_true', default=False, help="write data in outfile")
+    parser.add_argument("-d", "--delParticle", dest="delete_particle", action='store_true', default=False, help="switch to delete a particle (True) or reset a particle (default: False).")
+    parser.add_argument("-A", "--animate", dest="animate", action='store_true', default=False, help="animate the particle trajectories during the run or not (default: False).")
+    parser.add_argument("-V", "--visualize", dest="visualize", action='store_true', default=False, help="Visualize particle trajectories at the end (default: False). Requires -w in addition to take effect.")
+    parser.add_argument("-N", "--n_particles", dest="nparticles", type=str, default="2**6", help="number of particles to generate and advect (default: 2e6)")
+    parser.add_argument("-sN", "--start_n_particles", dest="start_nparticles", type=str, default="96", help="(optional) number of particles generated per release cycle (if --rt is set) (default: 96)")
+    parser.add_argument("-m", "--mode", dest="compute_mode", choices=['jit','scipy'], default="jit", help="computation mode = [JIT, SciPp]")
+    parser.add_argument("-G", "--GC", dest="useGC", action='store_true', default=False, help="using a garbage collector (default: false)")
+    args = parser.parse_args()
+
+    imageFileName=args.imageFileName
+    periodicFlag=args.periodic
+    backwardSimulation = args.backwards
+    repeatdtFlag=args.release
+    repeatRateMinutes=args.repeatdt
+    time_in_days = args.time_in_days
+    use_xarray = args.use_xarray
+    agingParticles = args.aging
+    with_GC = args.useGC
+
+    Nparticle = int(float(eval(args.nparticles)))
+    target_N = Nparticle
+    start_N_particles = int(float(eval(args.start_nparticles)))
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        if mpi_comm.Get_rank() == 0:
+            if agingParticles and not repeatdtFlag:
+                sys.stdout.write("N: {} ( {} )\n".format(Nparticle, int(Nparticle * (3.0 / 2.0)**2.0)))
+            else:
+                sys.stdout.write("N: {}\n".format(Nparticle))
+    else:
+        if agingParticles and not repeatdtFlag:
+            sys.stdout.write("N: {} ( {} )\n".format(Nparticle, int(Nparticle * (3.0 / 2.0)**2.0)))
+        else:
+            sys.stdout.write("N: {}\n".format(Nparticle))
+
+    dt_minutes = 60
+    nowtime = datetime.now()
+    ParcelsRandom.seed(nowtime.microsecond)
+
+    branch = "soa_benchmark"
+    computer_env = "local/unspecified"
+    scenario = "CMEMS"
+    headdir = ""
+    odir = ""
+    dirread_pal = ""
+    datahead = ""
+    dirread_top = ""
+    dirread_top_bgc = ""
+    if os.uname()[1] in ['science-bs35', 'science-bs36']:  # Gemini
+        # headdir = "/scratch/{}/experiments/palaeo-parcels".format(os.environ['USER'])
+        headdir = "/scratch/{}/experiments".format("ckehl")
+        odir = headdir
+        datahead = "/data/oceanparcels/input_data"
+        dirread_top = os.path.join(datahead, 'CMEMS/GLOBAL_REANALYSIS_PHY_001_030/')
+        computer_env = "Gemini"
+    elif fnmatch.fnmatchcase(os.uname()[1], "*.bullx*"):  # Cartesius
+        CARTESIUS_SCRATCH_USERNAME = 'ckehluu'
+        headdir = "/scratch/shared/{}/experiments".format(CARTESIUS_SCRATCH_USERNAME)
+        odir = headdir
+        datahead = "/projects/0/topios/hydrodynamic_data"
+        dirread_top = os.path.join(datahead, 'CMEMS/GLOBAL_REANALYSIS_PHY_001_030/')
+        computer_env = "Cartesius"
+    else:
+        headdir = "/var/scratch/experiments"
+        odir = headdir
+        dirread_pal = headdir
+        datahead = "/data"
+        dirread_top = os.path.join(datahead, 'CMEMS/GLOBAL_REANALYSIS_PHY_001_030/')
+    print("running {} on {} (uname: {}) - branch '{}' - (target) N: {} - argv: {}".format(scenario, computer_env, os.uname()[1], branch, target_N, sys.argv[1:]))
+
+    if os.path.sep in imageFileName:
+        head_dir = os.path.dirname(imageFileName)
+        if head_dir[0] == os.path.sep:
+            odir = head_dir
+        else:
+            odir = os.path.join(odir, head_dir)
+            imageFileName = os.path.split(imageFileName)[1]
+
+    func_time = []
+    mem_used_GB = []
+
+    np.random.seed(nowtime.microsecond)
+    fieldset = create_CMEMS_fieldset(datahead, periodic_wrap=periodicFlag)
+
+    if args.compute_mode is 'scipy':
+        Nparticle = 2**10
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            #global_t_0 = MPI.Wtime()
+            global_t_0 = ostime.process_time()
+    else:
+        #global_t_0 = ostime.time()
+        global_t_0 = ostime.process_time()
+
+    simStart = None
+    for f in fieldset.get_fields():
+        if type(f) in [VectorField, NestedField, SummedField]:  # or not f.grid.defer_load
+            continue
+        else:
+            if backwardSimulation:
+                simStart=f.grid.time_full[-1]
+            else:
+                simStart = f.grid.time_full[0]
+            break
+
+    addParticleN = 1
+    # np_scaler = math.sqrt(3.0/2.0)
+    np_scaler = (3.0 / 2.0)**2.0       # **
+    # np_scaler = 3.0 / 2.0
+    # cycle_scaler = math.sqrt(3.0/2.0)
+    cycle_scaler = (3.0 / 2.0)**2.0    # **
+    # cycle_scaler = 3.0 / 2.0
+    if agingParticles:
+        if not repeatdtFlag:
+            Nparticle = int(Nparticle * np_scaler)
+        fieldset.add_constant('life_expectancy', delta(days=time_in_days).total_seconds())
+    if repeatdtFlag:
+        addParticleN = Nparticle/2.0
+        refresh_cycle = (delta(days=time_in_days).total_seconds() / (addParticleN/start_N_particles)) / cycle_scaler
+        if agingParticles:
+            refresh_cycle /= cycle_scaler
+        repeatRateMinutes = int(refresh_cycle/60.0) if repeatRateMinutes == 720 else repeatRateMinutes
+
+    if backwardSimulation:
+        # ==== backward simulation ==== #
+        if agingParticles:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * 360.0 -180.0, lat=np.random.rand(start_N_particles, 1) * 160.0 - 80.0, time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(int(Nparticle/2.0), 1) * 360.0 -180.0, lat=np.random.rand(int(Nparticle/2.0), 1) * 160.0 - 80.0, time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * 360.0 -180.0, lat=np.random.rand(Nparticle, 1) * 160.0 - 80.0, time=simStart)
+        else:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * 360.0 -180.0, lat=np.random.rand(start_N_particles, 1) * 160.0 - 80.0, time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(int(Nparticle/2.0), 1) * 360.0 -180.0, lat=np.random.rand(int(Nparticle/2.0), 1) * 160.0 - 80.0, time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * 360.0 -180.0, lat=np.random.rand(Nparticle, 1) * 160.0 - 80.0, time=simStart)
+    else:
+        # ==== forward simulation ==== #
+        if agingParticles:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * 360.0 -180.0, lat=np.random.rand(start_N_particles, 1) * 160.0 - 80.0, time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(int(Nparticle/2.0), 1) * 360.0 -180.0, lat=np.random.rand(int(Nparticle/2.0), 1) * 160.0 - 80.0, time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * 360.0 -180.0, lat=np.random.rand(Nparticle, 1) * 160.0 - 80.0, time=simStart)
+        else:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * 360.0 -180.0, lat=np.random.rand(start_N_particles, 1) * 160.0 - 80.0, time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(int(Nparticle/2.0), 1) * 360.0 -180.0, lat=np.random.rand(int(Nparticle/2.0), 1) * 160.0 - 80.0, time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * 360.0 -180.0, lat=np.random.rand(Nparticle, 1) * 160.0 - 80.0, time=simStart)
+
+
+    output_file = None
+    out_fname = "benchmark_CMEMS"
+    if args.write_out:
+        if MPI and (MPI.COMM_WORLD.Get_size()>1):
+            out_fname += "_MPI"
+        else:
+            out_fname += "_noMPI"
+        if periodicFlag:
+            out_fname += "_p"
+        out_fname += "_n"+str(Nparticle)
+        if backwardSimulation:
+            out_fname += "_bwd"
+        else:
+            out_fname += "_fwd"
+        if repeatdtFlag:
+            out_fname += "_add"
+        if agingParticles:
+            out_fname += "_age"
+        output_file = pset.ParticleFile(name=os.path.join(odir,out_fname+".nc"), outputdt=delta(hours=24))
+    delete_func = RenewParticle
+    if args.delete_particle:
+        delete_func=DeleteParticle
+    postProcessFuncs = []
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            #starttime = MPI.Wtime()
+            starttime = ostime.process_time()
+    else:
+        #starttime = ostime.time()
+        starttime = ostime.process_time()
+    kernels = pset.Kernel(AdvectionRK4,delete_cfiles=True)
+    kernels += pset.Kernel(periodicBC, delete_cfiles=True)
+    if agingParticles:
+        kernels += pset.Kernel(initialize, delete_cfiles=True)
+        kernels += pset.Kernel(Age, delete_cfiles=True)
+    if with_GC:
+        postProcessFuncs.append(perIterGC)
+    if backwardSimulation:
+        # ==== backward simulation ==== #
+        if args.animate:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=-dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12), moviedt=delta(hours=6), movie_background_field=fieldset.U)
+        else:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=-dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12))
+    else:
+        # ==== forward simulation ==== #
+        if args.animate:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12), moviedt=delta(hours=6), movie_background_field=fieldset.U)
+        else:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12))
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            #endtime = MPI.Wtime()
+            endtime = ostime.process_time()
+    else:
+        #endtime = ostime.time()
+        endtime = ostime.process_time()
+
+    if args.write_out:
+        output_file.close()
+
+    size_Npart = len(pset.nparticle_log)
+    Npart = pset.nparticle_log.get_param(size_Npart-1)
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        Npart = mpi_comm.reduce(Npart, op=MPI.SUM, root=0)
+        if mpi_comm.Get_rank() == 0:
+            if size_Npart>0:
+                sys.stdout.write("final # particles: {}\n".format( Npart ))
+            sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime-starttime))
+            avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+            sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time*1000.0))
+    else:
+        if size_Npart > 0:
+            sys.stdout.write("final # particles: {}\n".format( Npart ))
+        sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime - starttime))
+        avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+        sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time * 1000.0))
+
+    # if args.write_out:
+    #     output_file.close()
+    #     if args.visualize:
+    #         if MPI:
+    #             mpi_comm = MPI.COMM_WORLD
+    #             if mpi_comm.Get_rank() == 0:
+    #                 plotTrajectoriesFile_loadedField(os.path.join(odir, out_fname+".nc"), tracerfield=fieldset.U)
+    #         else:
+    #             plotTrajectoriesFile_loadedField(os.path.join(odir, out_fname+".nc"),tracerfield=fieldset.U)
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        # mpi_comm.Barrier()
+        Nparticles = mpi_comm.reduce(np.array(pset.nparticle_log.get_params()), op=MPI.SUM, root=0)
+        Nmem = mpi_comm.reduce(np.array(pset.mem_log.get_params()), op=MPI.SUM, root=0)
+        if mpi_comm.Get_rank() == 0:
+            pset.plot_and_log(memory_used=Nmem, nparticles=Nparticles, target_N=target_N, imageFilePath=imageFileName, odir=odir, xlim_range=[0, 730], ylim_range=[0, 150])
+    else:
+        pset.plot_and_log(target_N=target_N, imageFilePath=imageFileName, odir=odir, xlim_range=[0, 730], ylim_range=[0, 150])
diff --git a/performance/benchmark_bickleyjet.py b/performance/benchmark_bickleyjet.py
new file mode 100644
index 0000000000..e5d02435c8
--- /dev/null
+++ b/performance/benchmark_bickleyjet.py
@@ -0,0 +1,434 @@
+"""
+Author: Dr. Christian Kehl
+Date: 11-02-2020
+"""
+
+from parcels import AdvectionEE, AdvectionRK45, AdvectionRK4
+from parcels import FieldSet, ScipyParticle, JITParticle, Variable, AdvectionRK4, StateCode, OperationCode, ErrorCode
+from parcels.particleset_benchmark import ParticleSet_Benchmark as BenchmarkParticleSet
+from parcels.particleset import ParticleSet as DryParticleSet
+from parcels.field import Field, VectorField, NestedField, SummedField
+# from parcels import plotTrajectoriesFile_loadedField
+# from parcels import rng as random
+from parcels import ParcelsRandom
+from datetime import timedelta as delta
+import math
+from argparse import ArgumentParser
+import datetime
+import numpy as np
+import xarray as xr
+import fnmatch
+import sys
+import gc
+import os
+import time as ostime
+# import matplotlib.pyplot as plt
+from parcels.tools import perlin3d
+from parcels.tools import perlin2d
+
+try:
+    from mpi4py import MPI
+except:
+    MPI = None
+with_GC = False
+
+pset = None
+ptype = {'scipy': ScipyParticle, 'jit': JITParticle}
+method = {'RK4': AdvectionRK4, 'EE': AdvectionEE, 'RK45': AdvectionRK45}
+global_t_0 = 0
+Nparticle = int(math.pow(2,10)) # equals to Nparticle = 1024
+#Nparticle = int(math.pow(2,11)) # equals to Nparticle = 2048
+#Nparticle = int(math.pow(2,12)) # equals to Nparticle = 4096
+#Nparticle = int(math.pow(2,13)) # equals to Nparticle = 8192
+#Nparticle = int(math.pow(2,14)) # equals to Nparticle = 16384
+#Nparticle = int(math.pow(2,15)) # equals to Nparticle = 32768
+#Nparticle = int(math.pow(2,16)) # equals to Nparticle = 65536
+#Nparticle = int(math.pow(2,17)) # equals to Nparticle = 131072
+#Nparticle = int(math.pow(2,18)) # equals to Nparticle = 262144
+#Nparticle = int(math.pow(2,19)) # equals to Nparticle = 524288
+
+#a = 1000 * 1e3
+#b = 1000 * 1e3
+#scalefac = 2.0
+#tsteps = 61
+#tscale = 6
+
+a = 10 * 1e3 # [a in km -> 10e3]  # is going to be overwritten
+b = 10 * 1e3 # [b in km -> 10e3]  # is going to be overwritten
+tsteps = 61 # in steps
+tstepsize = 12.0 # unitary
+tscale = 12.0*60.0*60.0 # in seconds
+# time[days] = (tsteps * tstepsize) * tscale
+# jet_rotation_speed = 14.0*24.0*60.0*60.0 # assume 1 rotation every 2 weeks
+sx = 1718.9
+sy = 3200.0
+# scalefac = 2.0 * 6.0 # scaling the advection speeds to 12 m/s
+scalefac = (40.0 / (1000.0/60.0))  # 40 km/h
+scalefac /= 1000.0
+
+# we need to modify the kernel.execute / pset.execute so that it returns from the JIT
+# in a given time WITHOUT writing to disk via outfie => introduce a pyloop_dt
+
+def DeleteParticle(particle, fieldset, time):
+    particle.delete()
+
+def RenewParticle(particle, fieldset, time):
+    particle.lat = np.random.rand() * a
+    particle.lon = np.random.rand() * (-b) + (b/2.0)
+
+def perIterGC():
+    gc.collect()
+
+def bickleyjet_from_numpy(xdim=540, ydim=320, periodic_wrap=False, write_out=False):
+    """Bickley Jet Field as implemented in Hadjighasem et al 2017, 10.1063/1.4982720"""
+    U0 = 0.06266
+    r0 = sx
+    L = r0 / 3.599435028  # 1770.
+    k1 = 2 * 1 / r0
+    k2 = 2 * 2 / r0
+    k3 = 2 * 3 / r0
+    eps1 = 0.075
+    eps2 = 0.4
+    eps3 = 0.3
+    c3 = 0.461 * U0
+    c2 = 0.205 * U0
+    c1 = c3 + ((np.sqrt(5)-1)/2.) * (k2/k1) * (c2 - c3)
+
+    global a, b
+    a, b = np.pi*r0, sy  # domain size
+    lon = np.linspace(0, a, xdim, dtype=np.float32)
+    lonrange = lon.max()-lon.min()
+    sys.stdout.write("lon field: {}\n".format(lon.size))
+    lat = np.linspace(-b*0.5, b*0.5, ydim, dtype=np.float32)
+    latrange = lat.max() - lat.min()
+    sys.stdout.write("lat field: {}\n".format(lat.size))
+    totime = (tsteps * tstepsize) * tscale
+    times = np.linspace(0., totime, tsteps, dtype=np.float64)
+    sys.stdout.write("time field: {}\n".format(times.size))
+    dx, dy = lon[2]-lon[1], lat[2]-lat[1]
+
+    U = np.zeros((times.size, lat.size, lon.size), dtype=np.float32)
+    V = np.zeros((times.size, lat.size, lon.size), dtype=np.float32)
+
+    for t in range(len(times)):
+        time_f = times[t]
+        f1 = eps1 * np.exp(-1j * k1 * c1 * time_f)
+        f2 = eps2 * np.exp(-1j * k2 * c2 * time_f)
+        f3 = eps3 * np.exp(-1j * k3 * c3 * time_f)
+        for j in range(lat.size):
+            for i in range(lon.size):
+                x1 = lon[i]-dx/2.0
+                x2 = lat[j]-dy/2.0
+                F1 = f1 * np.exp(1j * k1 * x1)
+                F2 = f2 * np.exp(1j * k2 * x1)
+                F3 = f3 * np.exp(1j * k3 * x1)
+                G = np.real(np.sum([F1, F2, F3]))
+                G_x = np.real(np.sum([1j * k1 * F1, 1j * k2 * F2, 1j * k3 * F3]))
+                U[t, j, i] = U0 / (np.cosh(x2/L)**2) + 2 * U0 * np.sinh(x2/L) / (np.cosh(x2/L)**3) * G
+                V[t, j, i] = U0 * L * (1./np.cosh(x2/L))**2 * G_x
+
+    U *= scalefac
+    # U = np.transpose(U, (0, 2, 1))
+    V *= scalefac
+    # V = np.transpose(V, (0, 2, 1))
+
+
+    data = {'U': U, 'V': V}
+    dimensions = {'time': times, 'lon': lon, 'lat': lat}
+    fieldset = None
+    if periodic_wrap:
+        fieldset = FieldSet.from_data(data, dimensions, mesh='flat', transpose=False, time_periodic=delta(days=1))
+    else:
+        fieldset = FieldSet.from_data(data, dimensions, mesh='flat', transpose=False, allow_time_extrapolation=True)
+    if write_out:
+        fieldset.write(filename=write_out)
+    return fieldset
+
+
+class AgeParticle_JIT(JITParticle):
+    age = Variable('age', dtype=np.float64, initial=0.0)
+    life_expectancy = Variable('life_expectancy', dtype=np.float64, initial=np.finfo(np.float64).max)
+    initialized_dynamic = Variable('initialized_dynamic', dtype=np.int32, initial=0)
+
+class AgeParticle_SciPy(ScipyParticle):
+    age = Variable('age', dtype=np.float64, initial=0.0)
+    life_expectancy = Variable('life_expectancy', dtype=np.float64, initial=np.finfo(np.float64).max)
+    initialized_dynamic = Variable('initialized_dynamic', dtype=np.int32, initial=0)
+
+def initialize(particle, fieldset, time):
+    if particle.initialized_dynamic < 1:
+        np_scaler = math.sqrt(3.0 / 2.0)
+        particle.life_expectancy = time + ParcelsRandom.uniform(.0, (fieldset.life_expectancy-time) * 2.0 / np_scaler)
+        # particle.life_expectancy = time + ParcelsRandom.uniform(.0, (fieldset.life_expectancy-time)*math.sqrt(3.0 / 2.0))
+        # particle.life_expectancy = time + ParcelsRandom.uniform(.0, fieldset.life_expectancy) * math.sqrt(3.0 / 2.0)
+        # particle.life_expectancy = time+ParcelsRandom.uniform(.0, fieldset.life_expectancy) * ((3.0/2.0)**2.0)
+        particle.initialized_dynamic = 1
+
+def Age(particle, fieldset, time):
+    if particle.state == StateCode.Evaluate:
+        particle.age = particle.age + math.fabs(particle.dt)
+    if particle.age > particle.life_expectancy:
+        particle.delete()
+
+age_ptype = {'scipy': AgeParticle_SciPy, 'jit': AgeParticle_JIT}
+
+if __name__=='__main__':
+    parser = ArgumentParser(description="Example of particle advection using in-memory stommel test case")
+    parser.add_argument("-i", "--imageFileName", dest="imageFileName", type=str, default="mpiChunking_plot_MPI.png", help="image file name of the plot")
+    parser.add_argument("-b", "--backwards", dest="backwards", action='store_true', default=False, help="enable/disable running the simulation backwards")
+    parser.add_argument("-p", "--periodic", dest="periodic", action='store_true', default=False, help="enable/disable periodic wrapping (else: extrapolation)")
+    parser.add_argument("-r", "--release", dest="release", action='store_true', default=False, help="continuously add particles via repeatdt (default: False)")
+    parser.add_argument("-rt", "--releasetime", dest="repeatdt", type=int, default=720, help="repeating release rate of added particles in Minutes (default: 720min = 12h)")
+    parser.add_argument("-a", "--aging", dest="aging", action='store_true', default=False, help="Removed aging particles dynamically (default: False)")
+    parser.add_argument("-t", "--time_in_days", dest="time_in_days", type=int, default=1, help="runtime in days (default: 1)")
+    parser.add_argument("-x", "--xarray", dest="use_xarray", action='store_true', default=False, help="use xarray as data backend")
+    parser.add_argument("-w", "--writeout", dest="write_out", action='store_true', default=False, help="write data in outfile")
+    parser.add_argument("-d", "--delParticle", dest="delete_particle", action='store_true', default=False, help="switch to delete a particle (True) or reset a particle (default: False).")
+    parser.add_argument("-A", "--animate", dest="animate", action='store_true', default=False, help="animate the particle trajectories during the run or not (default: False).")
+    parser.add_argument("-V", "--visualize", dest="visualize", action='store_true', default=False, help="Visualize particle trajectories at the end (default: False). Requires -w in addition to take effect.")
+    parser.add_argument("-N", "--n_particles", dest="nparticles", type=str, default="2**6", help="number of particles to generate and advect (default: 2e6)")
+    parser.add_argument("-sN", "--start_n_particles", dest="start_nparticles", type=str, default="96", help="(optional) number of particles generated per release cycle (if --rt is set) (default: 96)")
+    parser.add_argument("-m", "--mode", dest="compute_mode", choices=['jit','scipy'], default="jit", help="computation mode = [JIT, SciPp]")
+    parser.add_argument("-G", "--GC", dest="useGC", action='store_true', default=False, help="using a garbage collector (default: false)")
+    parser.add_argument("--dry", dest="dryrun", action="store_true", default=False, help="Start dry run (no benchmarking and its classes")
+    args = parser.parse_args()
+
+    ParticleSet = BenchmarkParticleSet
+    if args.dryrun:
+        ParticleSet = DryParticleSet
+
+    imageFileName=args.imageFileName
+    periodicFlag=args.periodic
+    backwardSimulation = args.backwards
+    repeatdtFlag=args.release
+    repeatRateMinutes=args.repeatdt
+    time_in_days = args.time_in_days
+    use_xarray = args.use_xarray
+    agingParticles = args.aging
+    with_GC = args.useGC
+    Nparticle = int(float(eval(args.nparticles)))
+    target_N = Nparticle
+    addParticleN = 1
+    # np_scaler = math.sqrt(3.0/2.0)
+    # np_scaler = (3.0 / 2.0)**2.0       # **
+    np_scaler = 3.0 / 2.0
+    # cycle_scaler = math.sqrt(3.0/2.0)
+    # cycle_scaler = (3.0 / 2.0)**2.0    # **
+    # cycle_scaler = 3.0 / 2.0
+    cycle_scaler = 7.0 / 4.0
+    start_N_particles = int(float(eval(args.start_nparticles)))
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        if mpi_comm.Get_rank() == 0:
+            if agingParticles and not repeatdtFlag:
+                sys.stdout.write("N: {} ( {} )\n".format(Nparticle, int(Nparticle * np_scaler)))
+            else:
+                sys.stdout.write("N: {}\n".format(Nparticle))
+    else:
+        if agingParticles and not repeatdtFlag:
+            sys.stdout.write("N: {} ( {} )\n".format(Nparticle, int(Nparticle * np_scaler)))
+        else:
+            sys.stdout.write("N: {}\n".format(Nparticle))
+
+    dt_minutes = 60
+    #dt_minutes = 20
+    #random.seed(123456)
+    nowtime = datetime.datetime.now()
+    ParcelsRandom.seed(nowtime.microsecond)
+
+    branch = "soa_benchmark"
+    computer_env = "local/unspecified"
+    scenario = "bickleyjet"
+    odir = ""
+    if os.uname()[1] in ['science-bs35', 'science-bs36']:  # Gemini
+        # odir = "/scratch/{}/experiments".format(os.environ['USER'])
+        odir = "/scratch/{}/experiments".format("ckehl")
+        computer_env = "Gemini"
+    elif fnmatch.fnmatchcase(os.uname()[1], "*.bullx*"):  # Cartesius
+        CARTESIUS_SCRATCH_USERNAME = 'ckehluu'
+        odir = "/scratch/shared/{}/experiments".format(CARTESIUS_SCRATCH_USERNAME)
+        computer_env = "Cartesius"
+    else:
+        odir = "/var/scratch/experiments"
+    print("running {} on {} (uname: {}) - branch '{}' - (target) N: {} - argv: {}".format(scenario, computer_env, os.uname()[1], branch, target_N, sys.argv[1:]))
+
+    if os.path.sep in imageFileName:
+        head_dir = os.path.dirname(imageFileName)
+        if head_dir[0] == os.path.sep:
+            odir = head_dir
+        else:
+            odir = os.path.join(odir, head_dir)
+            imageFileName = os.path.split(imageFileName)[1]
+
+    func_time = []
+    mem_used_GB = []
+
+    np.random.seed(0)
+    fieldset = None
+    if use_xarray:
+        fieldset = bickleyjet_from_numpy(periodic_wrap=periodicFlag)
+    else:
+        field_fpath = False
+        if args.write_out:
+            field_fpath = os.path.join(odir,"bickleyjet")
+        fieldset = bickleyjet_from_numpy(periodic_wrap=periodicFlag, write_out=field_fpath)
+
+    if args.compute_mode is 'scipy':
+        Nparticle = 2**10
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            global_t_0 = ostime.process_time()
+    else:
+        global_t_0 = ostime.process_time()
+
+    simStart = None
+    for f in fieldset.get_fields():
+        if type(f) in [VectorField, NestedField, SummedField]:  # or not f.grid.defer_load
+            continue
+        else:
+            if backwardSimulation:
+                simStart=f.grid.time_full[-1]
+            else:
+                simStart = f.grid.time_full[0]
+            break
+
+    if agingParticles:
+        if not repeatdtFlag:
+            Nparticle = int(Nparticle * np_scaler)
+        fieldset.add_constant('life_expectancy', delta(days=time_in_days).total_seconds())
+    if repeatdtFlag:
+        addParticleN = Nparticle/2.0
+        refresh_cycle = (delta(days=time_in_days).total_seconds() / (addParticleN/start_N_particles)) / cycle_scaler
+        if agingParticles:
+            refresh_cycle /= cycle_scaler
+        repeatRateMinutes = int(refresh_cycle/60.0) if repeatRateMinutes == 720 else repeatRateMinutes
+
+    if backwardSimulation:
+        # ==== backward simulation ==== #
+        if agingParticles:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * a, lat=np.random.rand(start_N_particles, 1) * (-b) + (b/2.0), time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(int(addParticleN), 1) * a, lat=np.random.rand(int(addParticleN), 1) * (-b) + (b/2.0), time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * a, lat=np.random.rand(Nparticle, 1) * (-b) + (b/2.0), time=simStart)
+        else:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * a, lat=np.random.rand(start_N_particles, 1) * (-b) + (b/2.0), time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(int(addParticleN), 1) * a, lat=np.random.rand(int(addParticleN), 1) * (-b) + (b/2.0), time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * a, lat=np.random.rand(Nparticle, 1) * (-b) + (b/2.0), time=simStart)
+    else:
+        # ==== forward simulation ==== #
+        if agingParticles:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * a, lat=np.random.rand(start_N_particles, 1) * (-b) + (b/2.0), time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(int(addParticleN), 1) * a, lat=np.random.rand(int(addParticleN), 1) * (-b) + (b/2.0), time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * a, lat=np.random.rand(Nparticle, 1) * (-b) + (b/2.0), time=simStart)
+        else:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * a, lat=np.random.rand(start_N_particles, 1) * (-b) + (b/2.0), time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(int(addParticleN), 1) * a, lat=np.random.rand(int(addParticleN), 1) * (-b) + (b/2.0), time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * a, lat=np.random.rand(Nparticle, 1) * (-b) + (b/2.0), time=simStart)
+
+    output_file = None
+    out_fname = "benchmark_bickleyjet"
+    if args.write_out:
+        if MPI and (MPI.COMM_WORLD.Get_size()>1):
+            out_fname += "_MPI"
+        else:
+            out_fname += "_noMPI"
+        if periodicFlag:
+            out_fname += "_p"
+        out_fname += "_n"+str(Nparticle)
+        if backwardSimulation:
+            out_fname += "_bwd"
+        else:
+            out_fname += "_fwd"
+        if repeatdtFlag:
+            out_fname += "_add"
+        if agingParticles:
+            out_fname += "_age"
+        output_file = pset.ParticleFile(name=os.path.join(odir,out_fname+".nc"), outputdt=delta(hours=24))
+    delete_func = RenewParticle
+    if args.delete_particle:
+        delete_func=DeleteParticle
+    postProcessFuncs = []
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            starttime = ostime.process_time()
+    else:
+        starttime = ostime.process_time()
+    kernels = pset.Kernel(AdvectionRK4,delete_cfiles=True)
+    if agingParticles:
+        kernels += pset.Kernel(initialize, delete_cfiles=True)
+        kernels += pset.Kernel(Age, delete_cfiles=True)
+    if with_GC:
+        postProcessFuncs.append(perIterGC)
+    if backwardSimulation:
+        # ==== backward simulation ==== #
+        if args.animate:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=-dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12), moviedt=delta(hours=6), movie_background_field=fieldset.U)
+        else:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=-dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12))
+    else:
+        # ==== forward simulation ==== #
+        if args.animate:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12), moviedt=delta(hours=6), movie_background_field=fieldset.U)
+        else:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12))
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            endtime = ostime.process_time()
+    else:
+        endtime = ostime.process_time()
+
+    if args.write_out:
+        output_file.close()
+
+    if not args.dryrun:
+        if MPI:
+            mpi_comm = MPI.COMM_WORLD
+            # mpi_comm.Barrier()
+            size_Npart = len(pset.nparticle_log)
+            Npart = pset.nparticle_log.get_param(size_Npart-1)
+            Npart = mpi_comm.reduce(Npart, op=MPI.SUM, root=0)
+            if mpi_comm.Get_rank() == 0:
+                if size_Npart>0:
+                    sys.stdout.write("final # particles: {}\n".format( Npart ))
+                sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime-starttime))
+                avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+                sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time*1000.0))
+        else:
+            size_Npart = len(pset.nparticle_log)
+            Npart = pset.nparticle_log.get_param(size_Npart-1)
+            if size_Npart > 0:
+                sys.stdout.write("final # particles: {}\n".format( Npart ))
+            sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime - starttime))
+            avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+            sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time * 1000.0))
+
+        if MPI:
+            mpi_comm = MPI.COMM_WORLD
+            Nparticles = mpi_comm.reduce(np.array(pset.nparticle_log.get_params()), op=MPI.SUM, root=0)
+            Nmem = mpi_comm.reduce(np.array(pset.mem_log.get_params()), op=MPI.SUM, root=0)
+            if mpi_comm.Get_rank() == 0:
+                pset.plot_and_log(memory_used=Nmem, nparticles=Nparticles, target_N=target_N, imageFilePath=imageFileName, odir=odir, xlim_range=[0, 730], ylim_range=[0, 150])
+        else:
+            pset.plot_and_log(target_N=target_N, imageFilePath=imageFileName, odir=odir, xlim_range=[0, 730], ylim_range=[0, 150])
+
+
diff --git a/performance/benchmark_deep_migration_NPacific.py b/performance/benchmark_deep_migration_NPacific.py
new file mode 100644
index 0000000000..b4dc2e2aaf
--- /dev/null
+++ b/performance/benchmark_deep_migration_NPacific.py
@@ -0,0 +1,476 @@
+from parcels import FieldSet, JITParticle, ScipyParticle, AdvectionRK4_3D, AdvectionRK4, ParticleFile, Variable, Field, NestedField, VectorField, StateCode, OperationCode, ErrorCode  # , timer
+from parcels.particleset_benchmark import ParticleSet_Benchmark
+from parcels.kernels import TEOSseawaterdensity as seawaterdensity
+from argparse import ArgumentParser
+from datetime import timedelta as delta
+from datetime import  datetime
+import time as ostime
+from glob import glob
+import os
+import sys
+import fnmatch
+import warnings
+import pickle
+# from numpy import *
+# import scipy.linalg
+import math
+import numpy as np
+import gc
+warnings.filterwarnings("ignore")
+
+try:
+    from mpi4py import MPI
+except:
+    MPI = None
+
+
+global_t_0 = 0
+# Fieldset grid is 30x30 deg in North Pacific
+minlat = 20 
+maxlat = 50 
+minlon = -175 
+maxlon = -145 
+
+# Release particles on a 10x10 deg grid in middle of the 30x30 fieldset grid and 1m depth
+lat_release0 = np.tile(np.linspace(30,39,10),[10,1]) 
+lat_release = lat_release0.T 
+lon_release = np.tile(np.linspace(-165,-156,10),[10,1]) 
+z_release = np.tile(1,[10,10])
+
+# Choose:
+simdays = 50.0 * 365.0
+#simdays = 5
+time0 = 0
+simhours = 1
+simmins = 30
+secsdt = 30
+hrsoutdt = 5
+
+#--------- Choose below: NOTE- MUST ALSO MANUALLY CHANGE IT IN THE KOOI KERNAL BELOW -----
+rho_pl = 920.                 # density of plastic (kg m-3): DEFAULT FOR FIG 1 in Kooi: 920 but full range is: 840, 920, 940, 1050, 1380 (last 2 are initially non-buoyant)
+r_pl = "1e-04"                # radius of plastic (m): DEFAULT FOR FIG 1: 10-3 to 10-6 included but full range is: 10 mm to 0.1 um or 10-2 to 10-7
+
+
+def Kooi(particle,fieldset,time):  
+    #------ CHOOSE -----
+    rho_pl = 920.                 # density of plastic (kg m-3): DEFAULT FOR FIG 1: 920 but full range is: 840, 920, 940, 1050, 1380 (last 2 are initially non-buoyant)
+    r_pl = 1e-04                  # radius of plastic (m): DEFAULT FOR FIG 1: 10-3 to 10-6 included but full range is: 10 mm to 0.1 um or 10-2 to 10-7   
+    
+    # Nitrogen to cell ratios for ambient algal concentrations ('aa') and algal growth ('mu_aa') from NEMO output (no longer using N:C:AA (Redfield ratio), directly N:AA from Menden-Deuer and Lessard 2000)     
+    min_N2cell = 2656.0e-09 #[mgN cell-1] (from Menden-Deuer and Lessard 2000)
+    max_N2cell = 11.0e-09   #[mgN cell-1] 
+    med_N2cell = 356.04e-09 #[mgN cell-1] THIS is used below 
+      
+    # Ambient algal concentration from MEDUSA's non-diatom + diatom phytoplankton 
+    n0 = particle.nd_phy+particle.d_phy # [mmol N m-3] in MEDUSA
+    n = n0*14.007       # conversion from [mmol N m-3] to [mg N m-3] (atomic weight of 1 mol of N = 14.007 g)   
+    n2 = n/med_N2cell   # conversion from [mg N m-3] to [no. m-3]
+    
+    if n2<0.: 
+        aa = 0.
+    else:
+        aa = n2   # [no m-3] to compare to Kooi model    
+    
+    # Primary productivity (algal growth) only above euphotic zone, condition same as in Kooi et al. 2017
+    if particle.depth<particle.euph_z:
+        tpp0 = particle.tpp3 # (particle.nd_tpp + particle.d_tpp)/particle.euph_z # Seeing if the 2D production of nondiatom + diatom can be converted to a vertical profile (better with TPP3)
+    else:
+        tpp0 = 0.    
+    
+    mu_n0 = tpp0*14.007               # conversion from mmol N m-3 d-1 to mg N m-3 d-1 (atomic weight of 1 mol of N = 14.007 g) 
+    mu_n = mu_n0/med_N2cell           # conversion from mg N m-3 d-1 to no. m-3 d-1
+    mu_n2 = mu_n/aa                   # conversion from no. m-3 d-1 to d-1
+    
+    if mu_n2<0.:
+        mu_aa = 0.
+    else:
+        mu_aa = mu_n2/86400. # conversion from d-1 to s-1
+        
+    z = particle.depth           # [m]
+    t = particle.temp            # [oC]
+    sw_visc = particle.sw_visc   # [kg m-1 s-1]
+    kin_visc = particle.kin_visc # [m2 s-1]
+    rho_sw = particle.density    # [kg m-3]   #rho_sw     
+    a = particle.a               # [no. m-2 s-1]
+    vs = particle.vs             # [m s-1]   #particle.depth
+
+    #------ Constants and algal properties -----
+    g = 7.32e10/(86400.**2.)    # gravitational acceleration (m d-2), now [s-2]
+    k = 1.0306E-13/(86400.**2.) # Boltzmann constant [m2 kg d-2 K-1] now [s-2] (=1.3804E-23)
+    rho_bf = 1388.              # density of biofilm ([g m-3]
+    v_a = 2.0E-16               # Volume of 1 algal cell [m-3]
+    m_a = 0.39/86400.           # mortality rate, now [s-1]
+    r20 = 0.1/86400.            # respiration rate, now [s-1] 
+    q10 = 2.                    # temperature coefficient respiration [-]
+    gamma = 1.728E5/86400.      # shear [d-1], now [s-1]
+    
+    #------ Volumes -----
+    v_pl = (4./3.)*math.pi*r_pl**3.             # volume of plastic [m3]
+    theta_pl = 4.*math.pi*r_pl**2.              # surface area of plastic particle [m2]
+    r_a = ((3./4.)*(v_a/math.pi))**(1./3.)      # radius of algae [m]
+    
+    v_bf = (v_a*a)*theta_pl                           # volume of biofilm [m3]
+    v_tot = v_bf + v_pl                               # volume of total [m3]
+    t_bf = ((v_tot*(3./(4.*math.pi)))**(1./3.))-r_pl  # biofilm thickness [m] 
+    
+    
+    r_tot = r_pl + t_bf                               # total radius [m]
+    rho_tot = (r_pl**3. * rho_pl + ((r_pl + t_bf)**3. - r_pl**3.)*rho_bf)/(r_pl + t_bf)**3. # total density [kg m-3]
+    rho_tot = rho_tot
+    theta_tot = 4.*math.pi*r_tot**2.                          # surface area of total [m2]
+    d_pl = k * (t + 273.16)/(6. * math.pi * sw_visc * r_tot)  # diffusivity of plastic particle [m2 s-1]
+    d_a = k * (t + 273.16)/(6. * math.pi * sw_visc * r_a)     # diffusivity of algal cells [m2 s-1] 
+    beta_abrown = 4.*math.pi*(d_pl + d_a)*(r_tot + r_a)       # Brownian motion [m3 s-1] 
+    beta_ashear = 1.3*gamma*((r_tot + r_a)**3.)               # advective shear [m3 s-1]
+    beta_aset = (1./2.)*math.pi*r_tot**2. * abs(vs)           # differential settling [m3 s-1]
+    beta_a = beta_abrown + beta_ashear + beta_aset            # collision rate [m3 s-1]
+    
+    a_coll = (beta_a*aa)/theta_pl
+    a_growth = mu_aa*a
+    a_mort = m_a*a
+    a_resp = (q10**((t-20.)/10.))*r20*a     
+    
+    particle.a += (a_coll + a_growth - a_mort - a_resp) * particle.dt
+
+    dn = 2. * (r_tot)                             # equivalent spherical diameter [m]
+    delta_rho = (rho_tot - rho_sw)/rho_sw         # normalised difference in density between total plastic+bf and seawater[-]        
+    d = ((rho_tot - rho_sw) * g * dn**3.)/(rho_sw * kin_visc**2.) # [-]
+    
+    if dn > 5e9:
+        w = 1000.
+    elif dn <0.05:
+        w = (d**2.) *1.71E-4
+    else:
+        w = 10.**(-3.76715 + (1.92944*math.log10(d)) - (0.09815*math.log10(d)**2.) - (0.00575*math.log10(d)**3.) + (0.00056*math.log10(d)**4.))
+    
+    if z >= 4000.: 
+        vs = 0
+    elif z < 1. and delta_rho < 0:
+        vs = 0  
+    elif delta_rho > 0:
+        vs = (g * kin_visc * w * delta_rho)**(1./3.)
+    else: 
+        a_del_rho = delta_rho*-1.
+        vs = -1.*(g * kin_visc * w * a_del_rho)**(1./3.)  # m s-1
+
+    particle.depth += vs * particle.dt 
+    particle.vs = vs
+    z = particle.depth
+    dt = particle.dt
+
+""" Defining the particle class """
+
+class plastic_particle(JITParticle): #ScipyParticle): #
+    u = Variable('u', dtype=np.float32,to_write=False)
+    v = Variable('v', dtype=np.float32,to_write=False)
+    w = Variable('w', dtype=np.float32,to_write=False)
+    temp = Variable('temp',dtype=np.float32,to_write=False)
+    density = Variable('density',dtype=np.float32,to_write=True)
+    #aa = Variable('aa',dtype=np.float32,to_write=True)
+    #d_tpp = Variable('d_tpp',dtype=np.float32,to_write=False) # mu_aa
+    #nd_tpp = Variable('nd_tpp',dtype=np.float32,to_write=False)
+    tpp3 = Variable('tpp3',dtype=np.float32,to_write=False)
+    euph_z = Variable('euph_z',dtype=np.float32,to_write=False)
+    d_phy = Variable('d_phy',dtype=np.float32,to_write=False)
+    nd_phy = Variable('nd_phy',dtype=np.float32,to_write=False)    
+    kin_visc = Variable('kin_visc',dtype=np.float32,to_write=False)
+    sw_visc = Variable('sw_visc',dtype=np.float32,to_write=False)    
+    a = Variable('a',dtype=np.float32,to_write=False)
+    vs = Variable('vs',dtype=np.float32,to_write=True)    
+    
+"""functions and kernals"""
+
+def DeleteParticle(particle, fieldset, time):
+    """Kernel for deleting particles if they are out of bounds."""
+    # print('particle is deleted') #print(particle.lon, particle.lat, particle.depth)
+    particle.delete()
+
+def perIterGC():
+    gc.collect()
+
+def getclosest_ij(lats,lons,latpt,lonpt):     
+    """Function to find the index of the closest point to a certain lon/lat value."""
+    dist_sq = (lats-latpt)**2 + (lons-lonpt)**2                 # find squared distance of every point on grid
+    minindex_flattened = dist_sq.argmin()                       # 1D index of minimum dist_sq element
+    return np.unravel_index(minindex_flattened, lats.shape)     # Get 2D index for latvals and lonvals arrays from 1D index
+
+def AdvectionRK4_3D_vert(particle, fieldset, time): # adapting AdvectionRK4_3D kernal to only vertical velocity 
+    """Advection of particles using fourth-order Runge-Kutta integration including vertical velocity.
+    Function needs to be converted to Kernel object before execution"""
+    (w1) = fieldset.W[time, particle.depth, particle.lat, particle.lon]
+    #lon1 = particle.lon + u1*.5*particle.dt
+    #lat1 = particle.lat + v1*.5*particle.dt
+    dep1 = particle.depth + w1*.5*particle.dt
+    (w2) = fieldset.W[time + .5 * particle.dt, dep1, particle.lat, particle.lon]
+    #lon2 = particle.lon + u2*.5*particle.dt
+    #lat2 = particle.lat + v2*.5*particle.dt
+    dep2 = particle.depth + w2*.5*particle.dt
+    (w3) = fieldset.W[time + .5 * particle.dt, dep2, particle.lat, particle.lon]
+    #lon3 = particle.lon + u3*particle.dt
+    #lat3 = particle.lat + v3*particle.dt
+    dep3 = particle.depth + w3*particle.dt
+    (w4) = fieldset.W[time + particle.dt, dep3, particle.lat, particle.lon]
+    #particle.lon += particle.lon #(u1 + 2*u2 + 2*u3 + u4) / 6. * particle.dt
+    #particle.lat += particle.lat #lats[1,1] #(v1 + 2*v2 + 2*v3 + v4) / 6. * particle.dt
+    particle.depth += (w1 + 2*w2 + 2*w3 + w4) / 6. * particle.dt
+    
+def Profiles(particle, fieldset, time):  
+    particle.temp = fieldset.cons_temperature[time, particle.depth,particle.lat,particle.lon]  
+    particle.d_phy= fieldset.d_phy[time, particle.depth,particle.lat,particle.lon]  
+    particle.nd_phy= fieldset.nd_phy[time, particle.depth,particle.lat,particle.lon] 
+    #particle.d_tpp = fieldset.d_tpp[time,particle.depth,particle.lat,particle.lon]
+    #particle.nd_tpp = fieldset.nd_tpp[time,particle.depth,particle.lat,particle.lon]
+    particle.tpp3 = fieldset.tpp3[time,particle.depth,particle.lat,particle.lon]
+    particle.euph_z = fieldset.euph_z[time,particle.depth,particle.lat,particle.lon]
+    particle.kin_visc = fieldset.KV[time,particle.depth,particle.lat,particle.lon] 
+    particle.sw_visc = fieldset.SV[time,particle.depth,particle.lat,particle.lon] 
+    particle.w = fieldset.W[time,particle.depth,particle.lat,particle.lon]
+
+if __name__ == "__main__":
+    parser = ArgumentParser(description="Example of particle advection using in-memory stommel test case")
+    parser.add_argument("-i", "--imageFileName", dest="imageFileName", type=str, default="mpiChunking_plot_MPI.png", help="image file name of the plot")
+    parser.add_argument("-p", "--periodic", dest="periodic", action='store_true', default=False, help="enable/disable periodic wrapping (else: extrapolation)")
+    parser.add_argument("-w", "--writeout", dest="write_out", action='store_true', default=False, help="write data in outfile")
+    # parser.add_argument("-t", "--time_in_days", dest="time_in_days", type=int, default=365, help="runtime in days (default: 365)")
+    parser.add_argument("-t", "--time_in_days", dest="time_in_days", type=str, default="1*365", help="runtime in days (default: 1*365)")
+    parser.add_argument("-G", "--GC", dest="useGC", action='store_true', default=False, help="using a garbage collector (default: false)")
+    args = parser.parse_args()
+
+    imageFileName=args.imageFileName
+    periodicFlag=args.periodic
+    time_in_days = int(float(eval(args.time_in_days)))
+    with_GC = args.useGC
+
+    branch = "soa_benchmark"
+    computer_env = "local/unspecified"
+    scenario = "deep_migration"
+    headdir = ""
+    odir = ""
+    datahead = ""
+    dirread_top = ""
+    dirread_top_bgc = ""
+    dirread_mesh = ""
+    if os.uname()[1] in ['science-bs35', 'science-bs36']:  # Gemini
+        # headdir = "/scratch/{}/experiments/palaeo-parcels".format(os.environ['USER'])
+        headdir = "/scratch/{}/experiments/deep_migration_behaviour".format("ckehl")
+        odir = os.path.join(headdir,"BENCHres")
+        datahead = "/data/oceanparcels/input_data"
+        dirread = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/means/')
+        dirread_bgc = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/means/')
+        dirread_mesh = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/domain/')
+        computer_env = "Gemini"
+    elif fnmatch.fnmatchcase(os.uname()[1], "*.bullx*"):  # Cartesius
+        CARTESIUS_SCRATCH_USERNAME = 'ckehluu'
+        headdir = "/scratch/shared/{}/experiments/deep_migration_behaviour".format(CARTESIUS_SCRATCH_USERNAME)
+        odir = os.path.join(headdir, "BENCHres")
+        datahead = "/projects/0/topios/hydrodynamic_data"
+        dirread = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/means/')
+        dirread_bgc = os.path.join(datahead, 'NEMO-MEDUSA_BGC/ORCA0083-N006/means/')
+        dirread_mesh = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/domain/')
+        computer_env = "Cartesius"
+    else:
+        headdir = "/var/scratch/dlobelle"
+        odir = os.path.join(headdir, "BENCHres")
+        datahead = "/data"
+        dirread = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/means/')
+        dirread_bgc = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/means/')
+        dirread_mesh = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/domain/')
+    print("running {} on {} (uname: {}) - branch '{}' - argv: {}".format(scenario, computer_env, os.uname()[1], branch, sys.argv[1:]))
+
+    # ==== CARTESIUS ==== #
+    # dirread = '/projects/0/topios/hydrodynamic_data/NEMO-MEDUSA/ORCA0083-N006/means/'
+    # dirread_bgc = '/projects/0/topios/hydrodynamic_data/NEMO-MEDUSA_BGC/ORCA0083-N006/means/'
+    # dirread_mesh = '/projects/0/topios/hydrodynamic_data/NEMO-MEDUSA/ORCA0083-N006/domain/'
+    # ==== GEMINI ==== #
+    # dirread = '/data/oceanparcels/input_data/NEMO-MEDUSA/ORCA0083-N006/means/'
+    # dirread_bgc = '/data/oceanparcels/input_data/NEMO-MEDUSA/ORCA0083-N006/means/'
+    # dirread_mesh = '/data/oceanparcels/input_data/NEMO-MEDUSA/ORCA0083-N006/domain/'
+    # dirwrite = '/scratch/ckehl/experiments/deep_migration_behaviour/NEMOres/tests/'
+    # ==== ====== ==== #
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            # global_t_0 = ostime.time()
+            # global_t_0 = MPI.Wtime()
+            global_t_0 = ostime.process_time()
+    else:
+        # global_t_0 = ostime.time()
+        global_t_0 = ostime.process_time()
+
+    ufiles = sorted(glob(dirread+'ORCA0083-N06_2000*d05U.nc')) #0105d05
+    vfiles = sorted(glob(dirread+'ORCA0083-N06_2000*d05V.nc'))
+    wfiles = sorted(glob(dirread+'ORCA0083-N06_2000*d05W.nc'))
+    pfiles = sorted(glob(dirread_bgc+'ORCA0083-N06_2000*d05P.nc'))
+    ppfiles = sorted(glob(dirread_bgc+'ORCA0083-N06_2000*d05D.nc'))  # dfiles
+    tsfiles = sorted(glob(dirread+'ORCA0083-N06_2000*d05T.nc'))
+    mesh_mask = dirread_mesh+'coordinates.nc'
+
+    filenames = {'U': {'lon': mesh_mask, 'lat': mesh_mask, 'depth': wfiles[0], 'data': ufiles}, #'depth': wfiles,
+                 'V': {'lon': mesh_mask, 'lat': mesh_mask, 'depth': wfiles[0], 'data': vfiles},
+                 'W': {'lon': mesh_mask, 'lat': mesh_mask, 'depth': wfiles[0], 'data': wfiles},
+                 'd_phy': {'lon': mesh_mask, 'lat': mesh_mask, 'depth': wfiles[0], 'data': pfiles},
+                 'nd_phy': {'lon': mesh_mask, 'lat': mesh_mask, 'depth': wfiles[0], 'data': pfiles},
+                 'euph_z': {'lon': mesh_mask, 'lat': mesh_mask, 'data': ppfiles},
+                 #'d_tpp': {'lon': mesh_mask, 'lat': mesh_mask, 'data': ppfiles}, # 'depth': wfiles,
+                 #'nd_tpp': {'lon': mesh_mask, 'lat': mesh_mask, 'data': ppfiles},
+                 'tpp3': {'lon': mesh_mask, 'lat': mesh_mask, 'depth': wfiles[0], 'data': ppfiles},
+                 'cons_temperature': {'lon': mesh_mask, 'lat': mesh_mask, 'depth': wfiles[0], 'data': tsfiles},
+                 'abs_salinity': {'lon': mesh_mask, 'lat': mesh_mask, 'depth': wfiles[0], 'data': tsfiles}}
+
+    variables = {'U': 'uo',
+                 'V': 'vo',
+                 'W': 'wo',
+                 'd_phy': 'PHD',
+                 'nd_phy': 'PHN',
+                 'euph_z': 'MED_XZE',
+                 #'d_tpp': 'ML_PRD', # units: mmolN/m2/d
+                 #'nd_tpp': 'ML_PRN', # units: mmolN/m2/d
+                 'tpp3': 'TPP3', # units: mmolN/m3/d
+                 'cons_temperature': 'potemp',
+                 'abs_salinity': 'salin'}
+
+    dimensions = {'U': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthw', 'time': 'time_counter'}, #time_centered
+                  'V': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthw', 'time': 'time_counter'},
+                  'W': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthw', 'time': 'time_counter'},
+                  'd_phy': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthw','time': 'time_counter'},
+                  'nd_phy': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthw','time': 'time_counter'},
+                  'euph_z': {'lon': 'glamf', 'lat': 'gphif','time': 'time_counter'},
+                  #'d_tpp': {'lon': 'glamf', 'lat': 'gphif','time': 'time_counter'}, # 'depth': 'depthw',
+                  #'nd_tpp': {'lon': 'glamf', 'lat': 'gphif','time': 'time_counter'},
+                  'tpp3': {'lon': 'glamf', 'lat': 'gphif','depth': 'depthw', 'time': 'time_counter'},
+                  'cons_temperature': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthw','time': 'time_counter'},
+                  'abs_salinity': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthw','time': 'time_counter'}}
+
+    chs = {'time_counter': 1, 'depthu': 75, 'depthv': 75, 'depthw': 75, 'deptht': 75, 'y': 200, 'x': 200}
+    nchs = {
+        'U':                {'lon': ('x', 96), 'lat': ('y', 48), 'depth': ('depthu', 25), 'time': ('time_counter', 1)},
+        'V':                {'lon': ('x', 96), 'lat': ('y', 48), 'depth': ('depthv', 25), 'time': ('time_counter', 1)},
+        'W':                {'lon': ('x', 96), 'lat': ('y', 48), 'depth': ('depthw', 25), 'time': ('time_counter', 1)},
+        'd_phy':            {'lon': ('x', 96), 'lat': ('y', 48), 'depth': ('deptht', 25), 'time': ('time_counter', 1)},  # pfiles
+        'nd_phy':           {'lon': ('x', 96), 'lat': ('y', 48), 'depth': ('deptht', 25), 'time': ('time_counter', 1)},  # pfiles
+        'euph_z':           {'lon': ('x', 96), 'lat': ('y', 48), 'depth': ('deptht', 25), 'time': ('time_counter', 1)},  # dfiles
+        # 'd_tpp':            {'lon': ('x', 96), 'lat': ('y', 48), 'time': ('time_counter', 1)},  # dfiles
+        # 'nd_tpp':           {'lon': ('x', 96), 'lat': ('y', 48), 'time': ('time_counter', 1)},  # dfiles
+        'tpp3':             {'lon': ('x', 96), 'lat': ('y', 48), 'depth': ('deptht', 25), 'time': ('time_counter', 1)},  # dfiles
+        'cons_temperature': {'lon': ('x', 96), 'lat': ('y', 48), 'depth': ('deptht', 25), 'time': ('time_counter', 1)},  # tfiles
+        'abs_salinity':     {'lon': ('x', 96), 'lat': ('y', 48), 'depth': ('deptht', 25), 'time': ('time_counter', 1)},  # tfiles
+    }
+    try:
+        fieldset = FieldSet.from_nemo(filenames, variables, dimensions, allow_time_extrapolation=False, field_chunksize=chs, time_periodic=delta(days=365))
+    except (SyntaxError, ):
+        fieldset = FieldSet.from_nemo(filenames, variables, dimensions, allow_time_extrapolation=False, chunksize=nchs, time_periodic=delta(days=365))
+    depths = fieldset.U.depth
+
+    outfile = 'Kooi+NEMO_3D_grid10by10_rho'+str(int(rho_pl))+'_r'+ r_pl+'_'+str(simdays)+'days_'+str(secsdt)+'dtsecs_'+str(hrsoutdt)+'hrsoutdt'
+    dirwrite = os.path.join(odir, "rho_"+str(int(rho_pl))+"kgm-3")
+    if not os.path.exists(dirwrite):
+        os.mkdir(dirwrite)
+
+    # Kinematic viscosity and dynamic viscosity not available in MEDUSA so replicating Kooi's profiles at all grid points
+    # profile_auxin_path = '/home/dlobelle/Kooi_data/data_input/profiles.pickle'
+    # profile_auxin_path = '/scratch/ckehl/experiments/deep_migration_behaviour/aux_in/profiles.pickle'
+    profile_auxin_path = os.path.join(headdir, 'aux_in/profiles.pickle')
+    with open(profile_auxin_path, 'rb') as f:
+        depth,T_z,S_z,rho_z,upsilon_z,mu_z = pickle.load(f)
+
+    v_lon = np.array([minlon,maxlon])
+    v_lat = np.array([minlat,maxlat])
+
+    print("|lon| = {}; |lat| = {}".format(v_lon.shape[0], v_lat.shape[0]))
+
+    kv_or = np.transpose(np.tile(np.array(upsilon_z),(v_lon.shape[0],v_lat.shape[0],1)), (2,0,1))   # kinematic viscosity
+    sv_or = np.transpose(np.tile(np.array(mu_z),(v_lon.shape[0],v_lat.shape[0],1)), (2,0,1))        # dynamic viscosity of seawater
+    try:
+        KV = Field('KV', kv_or, lon=v_lon, lat=v_lat, depth=depths, mesh='spherical', field_chunksize=False)#,transpose="True") #,fieldtype='U')
+        SV = Field('SV', sv_or, lon=v_lon, lat=v_lat, depth=depths, mesh='spherical', field_chunksize=False)#,transpose="True") #,fieldtype='U')
+    except (SyntaxError, ):
+        KV = Field('KV', kv_or, lon=v_lon, lat=v_lat, depth=depths, mesh='spherical', chunksize=False)#,transpose="True") #,fieldtype='U')
+        SV = Field('SV', sv_or, lon=v_lon, lat=v_lat, depth=depths, mesh='spherical', chunksize=False)#,transpose="True") #,fieldtype='U')
+    fieldset.add_field(KV, 'KV')
+    fieldset.add_field(SV, 'SV')
+
+    """ Defining the particle set """
+    pset = ParticleSet_Benchmark.from_list(fieldset=fieldset,         # the fields on which the particles are advected
+                                 pclass=plastic_particle,   # the type of particles (JITParticle or ScipyParticle)
+                                 lon= lon_release, #-160.,  # a vector of release longitudes
+                                 lat= lat_release, #36.,
+                                 time = time0,
+                                 depth = z_release) #[1.]
+
+    # perflog = PerformanceLog()
+    # perflog.pset = pset
+    #postProcessFuncs = [perflog.advance,]
+
+    """ Kernal + Execution"""
+    postProcessFuncs = []
+    if with_GC:
+        postProcessFuncs.append(perIterGC)
+    output_fpath = None
+    if args.write_out:
+        output_fpath = os.path.join(dirwrite, outfile)
+    # kernels = pset.Kernel(AdvectionRK4_3D) + pset.Kernel(seawaterdensity.polyTEOS10_bsq) + pset.Kernel(Profiles) + pset.Kernel(Kooi)
+    kernels = pset.Kernel(AdvectionRK4_3D_vert) + pset.Kernel(seawaterdensity.PolyTEOS10_bsq) + pset.Kernel(Profiles) + pset.Kernel(Kooi)
+    pfile= pset.ParticleFile(output_fpath, outputdt=delta(hours = hrsoutdt))
+
+    starttime = 0
+    endtime = 0
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            # global_t_0 = ostime.time()
+            # starttime = MPI.Wtime()
+            starttime = ostime.process_time()
+    else:
+        #starttime = ostime.time()
+        starttime = ostime.process_time()
+
+    # postIterationCallbacks = postProcessFuncs, callbackdt = delta(hours=hrsoutdt)
+    pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(seconds = secsdt), output_file=pfile, verbose_progress=True, recovery={ErrorCode.ErrorOutOfBounds: DeleteParticle}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours = hrsoutdt))
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            # global_t_0 = ostime.time()
+            # endtime = MPI.Wtime()
+            endtime = ostime.process_time()
+    else:
+        # endtime = ostime.time()
+        endtime = ostime.process_time()
+
+    if args.write_out:
+        pfile.close()
+
+    size_Npart = len(pset.nparticle_log)
+    Npart = pset.nparticle_log.get_param(size_Npart-1)
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_comm.Barrier()
+        Npart = mpi_comm.reduce(Npart, op=MPI.SUM, root=0)
+        if mpi_comm.Get_rank() == 0:
+            if size_Npart>0:
+                sys.stdout.write("final # particles: {}\n".format( Npart ))
+            sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime-starttime))
+            avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+            sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time*1000.0))
+    else:
+        if size_Npart > 0:
+            sys.stdout.write("final # particles: {}\n".format( Npart ))
+        sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime - starttime))
+        avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+        sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time * 1000.0))
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        # mpi_comm.Barrier()
+        Nparticles = mpi_comm.reduce(np.array(pset.nparticle_log.get_params()), op=MPI.SUM, root=0)
+        Nmem = mpi_comm.reduce(np.array(pset.mem_log.get_params()), op=MPI.SUM, root=0)
+        if mpi_comm.Get_rank() == 0:
+            pset.plot_and_log(memory_used=Nmem, nparticles=Nparticles, target_N=1, imageFilePath=imageFileName, odir=odir)
+    else:
+        pset.plot_and_log(target_N=1, imageFilePath=imageFileName, odir=odir)
+
+    print('Execution finished')
diff --git a/performance/benchmark_doublegyre.py b/performance/benchmark_doublegyre.py
new file mode 100644
index 0000000000..12dcc6cbc9
--- /dev/null
+++ b/performance/benchmark_doublegyre.py
@@ -0,0 +1,429 @@
+"""
+Author: Dr. Christian Kehl
+Date: 11-02-2020
+"""
+
+from parcels import AdvectionEE, AdvectionRK45, AdvectionRK4
+from parcels import FieldSet, ScipyParticle, JITParticle, Variable, AdvectionRK4, StateCode, OperationCode, ErrorCode
+from parcels.particleset_benchmark import ParticleSet_Benchmark as BenchmarkParticleSet
+from parcels.particleset import ParticleSet as DryParticleSet
+from parcels.field import Field, VectorField, NestedField, SummedField
+# from parcels import plotTrajectoriesFile_loadedField
+# from parcels import rng as random
+from parcels import ParcelsRandom
+from datetime import timedelta as delta
+import math
+from argparse import ArgumentParser
+import datetime
+import numpy as np
+import xarray as xr
+import fnmatch
+import sys
+import gc
+import os
+import time as ostime
+# import matplotlib.pyplot as plt
+from parcels.tools import perlin3d
+from parcels.tools import perlin2d
+
+try:
+    from mpi4py import MPI
+except:
+    MPI = None
+with_GC = False
+
+pset = None
+ptype = {'scipy': ScipyParticle, 'jit': JITParticle}
+method = {'RK4': AdvectionRK4, 'EE': AdvectionEE, 'RK45': AdvectionRK45}
+global_t_0 = 0
+Nparticle = int(math.pow(2,10)) # equals to Nparticle = 1024
+#Nparticle = int(math.pow(2,11)) # equals to Nparticle = 2048
+#Nparticle = int(math.pow(2,12)) # equals to Nparticle = 4096
+#Nparticle = int(math.pow(2,13)) # equals to Nparticle = 8192
+#Nparticle = int(math.pow(2,14)) # equals to Nparticle = 16384
+#Nparticle = int(math.pow(2,15)) # equals to Nparticle = 32768
+#Nparticle = int(math.pow(2,16)) # equals to Nparticle = 65536
+#Nparticle = int(math.pow(2,17)) # equals to Nparticle = 131072
+#Nparticle = int(math.pow(2,18)) # equals to Nparticle = 262144
+#Nparticle = int(math.pow(2,19)) # equals to Nparticle = 524288
+
+#a = 1000 * 1e3
+#b = 1000 * 1e3
+#scalefac = 2.0
+#tsteps = 61
+#tscale = 6
+
+a = 9.6 * 1e3 # [a in km -> 10e3]
+b = 4.8 * 1e3 # [b in km -> 10e3]
+tsteps = 122 # in steps
+tstepsize = 6.0 # unitary
+tscale = 12.0*60.0*60.0 # in seconds
+# time[days] = (tsteps * tstepsize) * tscale
+# gyre_rotation_speed = 60.0*24.0*60.0*60.0  # assume 1 rotation every 8.5 weeks
+gyre_rotation_speed = (366.0*24.0*60.0*60.0)/2.0  # assume 1 rotation every 8.5 weeks
+# scalefac = 2.0 * 6.0 # scaling the advection speeds to 12 m/s
+scalefac = (40.0 / (1000.0/60.0))  # 40 km/h
+scalefac /= 1000.0
+
+# we need to modify the kernel.execute / pset.execute so that it returns from the JIT
+# in a given time WITHOUT writing to disk via outfie => introduce a pyloop_dt
+
+def DeleteParticle(particle, fieldset, time):
+    particle.delete()
+
+def RenewParticle(particle, fieldset, time):
+    particle.lat = np.random.rand() * (-a) + (a/2.0)
+    particle.lon = np.random.rand() * (-b) + (b/2.0)
+
+def perIterGC():
+    gc.collect()
+
+def doublegyre_from_numpy(xdim=960, ydim=480, periodic_wrap=False, write_out=False, steady=False):
+    """Implemented following Froyland and Padberg (2009), 10.1016/j.physd.2009.03.002"""
+    A = 0.3
+    epsilon = 0.25
+    omega = 2 * np.pi
+
+
+    lon = np.linspace(-a*0.5, a*0.5, xdim, dtype=np.float32)
+    lonrange = lon.max()-lon.min()
+    sys.stdout.write("lon field: {}\n".format(lon.size))
+    lat = np.linspace(-b*0.5, b*0.5, ydim, dtype=np.float32)
+    latrange = lat.max() - lat.min()
+    sys.stdout.write("lat field: {}\n".format(lat.size))
+    totime = (tsteps * tstepsize) * tscale
+    times = np.linspace(0., totime, tsteps, dtype=np.float64)
+    sys.stdout.write("time field: {}\n".format(times.size))
+    dx, dy = lon[2]-lon[1], lat[2]-lat[1]
+
+    U = np.zeros((times.size, lat.size, lon.size), dtype=np.float32)
+    V = np.zeros((times.size, lat.size, lon.size), dtype=np.float32)
+    freqs = np.ones(times.size, dtype=np.float32)
+    if not steady:
+        for ti in range(times.shape[0]):
+            time_f = times[ti] / gyre_rotation_speed
+            # time_f = np.fmod((times[ti])/gyre_rotation_speed, 2.0)
+            # time_f = np.fmod((times[ti]/gyre_rotation_speed), 2*np.pi)
+            # freqs[ti] = omega * np.cos(time_f) * 2.0
+            freqs[ti] *= omega * time_f
+            # freqs[ti] *= time_f
+    else:
+        freqs = (freqs * 0.5) * omega
+
+    for ti in range(times.shape[0]):
+        freq = freqs[ti]
+        # print(freq)
+        for i in range(lon.shape[0]):
+            for j in range(lat.shape[0]):
+                x1 = ((lon[i]*2.0 + a) / a) # - dx / 2
+                x2 = ((lat[j]*2.0 + b) / (2.0*b)) # - dy / 2
+                f_xt = (epsilon * np.sin(freq) * x1**2.0) + (1.0 - (2.0 * epsilon * np.sin(freq))) * x1
+                U[ti, j, i] = -np.pi * A * np.sin(np.pi * f_xt) * np.cos(np.pi * x2)
+                V[ti, j, i] = np.pi * A * np.cos(np.pi * f_xt) * np.sin(np.pi * x2) * (2 * epsilon * np.sin(freq) * x1 + 1 - 2 * epsilon * np.sin(freq))
+
+    U *= scalefac
+    # U = np.transpose(U, (0, 2, 1))
+    V *= scalefac
+    # V = np.transpose(V, (0, 2, 1))
+
+
+    data = {'U': U, 'V': V}
+    dimensions = {'time': times, 'lon': lon, 'lat': lat}
+    fieldset = None
+    if periodic_wrap:
+        fieldset = FieldSet.from_data(data, dimensions, mesh='flat', transpose=False, time_periodic=delta(days=1))
+    else:
+        fieldset = FieldSet.from_data(data, dimensions, mesh='flat', transpose=False, allow_time_extrapolation=True)
+    if write_out:
+        fieldset.write(filename=write_out)
+    return fieldset
+
+
+class AgeParticle_JIT(JITParticle):
+    age = Variable('age', dtype=np.float64, initial=0.0)
+    life_expectancy = Variable('life_expectancy', dtype=np.float64, initial=np.finfo(np.float64).max)
+    initialized_dynamic = Variable('initialized_dynamic', dtype=np.int32, initial=0)
+
+class AgeParticle_SciPy(ScipyParticle):
+    age = Variable('age', dtype=np.float64, initial=0.0)
+    life_expectancy = Variable('life_expectancy', dtype=np.float64, initial=np.finfo(np.float64).max)
+    initialized_dynamic = Variable('initialized_dynamic', dtype=np.int32, initial=0)
+
+def initialize(particle, fieldset, time):
+    if particle.initialized_dynamic < 1:
+        np_scaler = math.sqrt(3.0 / 2.0)
+        particle.life_expectancy = time + ParcelsRandom.uniform(.0, (fieldset.life_expectancy-time) * 2.0 / np_scaler)
+        # particle.life_expectancy = time + ParcelsRandom.uniform(.0, (fieldset.life_expectancy-time)*math.sqrt(3.0 / 2.0))
+        # particle.life_expectancy = time + ParcelsRandom.uniform(.0, fieldset.life_expectancy) * math.sqrt(3.0 / 2.0)
+        # particle.life_expectancy = time+ParcelsRandom.uniform(.0, fieldset.life_expectancy) * ((3.0/2.0)**2.0)
+        particle.initialized_dynamic = 1
+
+def Age(particle, fieldset, time):
+    if particle.state == StateCode.Evaluate:
+        particle.age = particle.age + math.fabs(particle.dt)
+    if particle.age > particle.life_expectancy:
+        particle.delete()
+
+age_ptype = {'scipy': AgeParticle_SciPy, 'jit': AgeParticle_JIT}
+
+if __name__=='__main__':
+    parser = ArgumentParser(description="Example of particle advection using in-memory stommel test case")
+    parser.add_argument("-i", "--imageFileName", dest="imageFileName", type=str, default="mpiChunking_plot_MPI.png", help="image file name of the plot")
+    parser.add_argument("-b", "--backwards", dest="backwards", action='store_true', default=False, help="enable/disable running the simulation backwards")
+    parser.add_argument("-p", "--periodic", dest="periodic", action='store_true', default=False, help="enable/disable periodic wrapping (else: extrapolation)")
+    parser.add_argument("-r", "--release", dest="release", action='store_true', default=False, help="continuously add particles via repeatdt (default: False)")
+    parser.add_argument("-rt", "--releasetime", dest="repeatdt", type=int, default=720, help="repeating release rate of added particles in Minutes (default: 720min = 12h)")
+    parser.add_argument("-a", "--aging", dest="aging", action='store_true', default=False, help="Removed aging particles dynamically (default: False)")
+    parser.add_argument("-t", "--time_in_days", dest="time_in_days", type=int, default=1, help="runtime in days (default: 1)")
+    parser.add_argument("-x", "--xarray", dest="use_xarray", action='store_true', default=False, help="use xarray as data backend")
+    parser.add_argument("-w", "--writeout", dest="write_out", action='store_true', default=False, help="write data in outfile")
+    parser.add_argument("-d", "--delParticle", dest="delete_particle", action='store_true', default=False, help="switch to delete a particle (True) or reset a particle (default: False).")
+    parser.add_argument("-A", "--animate", dest="animate", action='store_true', default=False, help="animate the particle trajectories during the run or not (default: False).")
+    parser.add_argument("-V", "--visualize", dest="visualize", action='store_true', default=False, help="Visualize particle trajectories at the end (default: False). Requires -w in addition to take effect.")
+    parser.add_argument("-N", "--n_particles", dest="nparticles", type=str, default="2**6", help="number of particles to generate and advect (default: 2e6)")
+    parser.add_argument("-sN", "--start_n_particles", dest="start_nparticles", type=str, default="96", help="(optional) number of particles generated per release cycle (if --rt is set) (default: 96)")
+    parser.add_argument("-m", "--mode", dest="compute_mode", choices=['jit','scipy'], default="jit", help="computation mode = [JIT, SciPp]")
+    parser.add_argument("-G", "--GC", dest="useGC", action='store_true', default=False, help="using a garbage collector (default: false)")
+    parser.add_argument("--dry", dest="dryrun", action="store_true", default=False, help="Start dry run (no benchmarking and its classes")
+    args = parser.parse_args()
+
+    ParticleSet = BenchmarkParticleSet
+    if args.dryrun:
+        ParticleSet = DryParticleSet
+
+    imageFileName=args.imageFileName
+    periodicFlag=args.periodic
+    backwardSimulation = args.backwards
+    repeatdtFlag=args.release
+    repeatRateMinutes=args.repeatdt
+    time_in_days = args.time_in_days
+    use_xarray = args.use_xarray
+    agingParticles = args.aging
+    with_GC = args.useGC
+    Nparticle = int(float(eval(args.nparticles)))
+    target_N = Nparticle
+    addParticleN = 1
+    # np_scaler = math.sqrt(3.0/2.0)
+    # np_scaler = (3.0 / 2.0)**2.0       # **
+    np_scaler = 3.0 / 2.0
+    # cycle_scaler = math.sqrt(3.0/2.0)
+    # cycle_scaler = (3.0 / 2.0)**2.0    # **
+    # cycle_scaler = 3.0 / 2.0
+    cycle_scaler = 7.0 / 4.0
+    start_N_particles = int(float(eval(args.start_nparticles)))
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        if mpi_comm.Get_rank() == 0:
+            if agingParticles and not repeatdtFlag:
+                sys.stdout.write("N: {} ( {} )\n".format(Nparticle, int(Nparticle * np_scaler)))
+            else:
+                sys.stdout.write("N: {}\n".format(Nparticle))
+    else:
+        if agingParticles and not repeatdtFlag:
+            sys.stdout.write("N: {} ( {} )\n".format(Nparticle, int(Nparticle * np_scaler)))
+        else:
+            sys.stdout.write("N: {}\n".format(Nparticle))
+
+    dt_minutes = 60
+    #dt_minutes = 20
+    #random.seed(123456)
+    nowtime = datetime.datetime.now()
+    ParcelsRandom.seed(nowtime.microsecond)
+
+    branch = "soa_benchmark"
+    computer_env = "local/unspecified"
+    scenario = "doublegyre"
+    odir = ""
+    if os.uname()[1] in ['science-bs35', 'science-bs36']:  # Gemini
+        # odir = "/scratch/{}/experiments".format(os.environ['USER'])
+        odir = "/scratch/{}/experiments".format("ckehl")
+        computer_env = "Gemini"
+    elif fnmatch.fnmatchcase(os.uname()[1], "*.bullx*"):  # Cartesius
+        CARTESIUS_SCRATCH_USERNAME = 'ckehluu'
+        odir = "/scratch/shared/{}/experiments".format(CARTESIUS_SCRATCH_USERNAME)
+        computer_env = "Cartesius"
+    else:
+        odir = "/var/scratch/experiments"
+    print("running {} on {} (uname: {}) - branch '{}' - (target) N: {} - argv: {}".format(scenario, computer_env, os.uname()[1], branch, target_N, sys.argv[1:]))
+
+    if os.path.sep in imageFileName:
+        head_dir = os.path.dirname(imageFileName)
+        if head_dir[0] == os.path.sep:
+            odir = head_dir
+        else:
+            odir = os.path.join(odir, head_dir)
+            imageFileName = os.path.split(imageFileName)[1]
+
+    func_time = []
+    mem_used_GB = []
+
+    np.random.seed(0)
+    fieldset = None
+    if use_xarray:
+        fieldset = doublegyre_from_numpy(periodic_wrap=periodicFlag)
+    else:
+        field_fpath = False
+        if args.write_out:
+            field_fpath = os.path.join(odir,"doublegyre")
+        # fieldset = doublegyre_from_numpy(xdim=240, ydim=120, periodic_wrap=periodicFlag, write_out=field_fpath)
+        fieldset = doublegyre_from_numpy(xdim=960, ydim=480, periodic_wrap=periodicFlag, write_out=field_fpath)
+
+    if args.compute_mode is 'scipy':
+        Nparticle = 2**10
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            global_t_0 = ostime.process_time()
+    else:
+        global_t_0 = ostime.process_time()
+
+    simStart = None
+    for f in fieldset.get_fields():
+        if type(f) in [VectorField, NestedField, SummedField]:  # or not f.grid.defer_load
+            continue
+        else:
+            if backwardSimulation:
+                simStart=f.grid.time_full[-1]
+            else:
+                simStart = f.grid.time_full[0]
+            break
+
+    if agingParticles:
+        if not repeatdtFlag:
+            Nparticle = int(Nparticle * np_scaler)
+        fieldset.add_constant('life_expectancy', delta(days=time_in_days).total_seconds())
+    if repeatdtFlag:
+        addParticleN = Nparticle/2.0
+        refresh_cycle = (delta(days=time_in_days).total_seconds() / (addParticleN/start_N_particles)) / cycle_scaler
+        if agingParticles:
+            refresh_cycle /= cycle_scaler
+        repeatRateMinutes = int(refresh_cycle/60.0) if repeatRateMinutes == 720 else repeatRateMinutes
+
+    if backwardSimulation:
+        # ==== backward simulation ==== #
+        if agingParticles:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * (-a) + (a/2.0), lat=np.random.rand(start_N_particles, 1) * (-b) + (b/2.0), time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(int(addParticleN), 1) * (-a) + (a/2.0), lat=np.random.rand(int(addParticleN), 1) * (-b) + (b/2.0), time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * (-a) + (a/2.0), lat=np.random.rand(Nparticle, 1) * (-b) + (b/2.0), time=simStart)
+        else:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * (-a) + (a/2.0), lat=np.random.rand(start_N_particles, 1) * (-b) + (b/2.0), time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(int(addParticleN), 1) * (-a) + (a/2.0), lat=np.random.rand(int(addParticleN), 1) * (-b) + (b/2.0), time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * (-a) + (a/2.0), lat=np.random.rand(Nparticle, 1) * (-b) + (b/2.0), time=simStart)
+    else:
+        # ==== forward simulation ==== #
+        if agingParticles:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * (-a) + (a/2.0), lat=np.random.rand(start_N_particles, 1) * (-b) + (b/2.0), time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(int(addParticleN), 1) * (-a) + (a/2.0), lat=np.random.rand(int(addParticleN), 1) * (-b) + (b/2.0), time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * (-a) + (a/2.0), lat=np.random.rand(Nparticle, 1) * (-b) + (b/2.0), time=simStart)
+        else:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * (-a) + (a/2.0), lat=np.random.rand(start_N_particles, 1) * (-b) + (b/2.0), time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(int(addParticleN), 1) * (-a) + (a/2.0), lat=np.random.rand(int(addParticleN), 1) * (-b) + (b/2.0), time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * (-a) + (a/2.0), lat=np.random.rand(Nparticle, 1) * (-b) + (b/2.0), time=simStart)
+
+    output_file = None
+    out_fname = "benchmark_doublegyre"
+    if args.write_out:
+        if MPI and (MPI.COMM_WORLD.Get_size()>1):
+            out_fname += "_MPI"
+        else:
+            out_fname += "_noMPI"
+        if periodicFlag:
+            out_fname += "_p"
+        out_fname += "_n"+str(Nparticle)
+        if backwardSimulation:
+            out_fname += "_bwd"
+        else:
+            out_fname += "_fwd"
+        if repeatdtFlag:
+            out_fname += "_add"
+        if agingParticles:
+            out_fname += "_age"
+        output_file = pset.ParticleFile(name=os.path.join(odir,out_fname+".nc"), outputdt=delta(hours=24))
+    delete_func = RenewParticle
+    if args.delete_particle:
+        delete_func=DeleteParticle
+    postProcessFuncs = []
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            starttime = ostime.process_time()
+    else:
+        starttime = ostime.process_time()
+    kernels = pset.Kernel(AdvectionRK4,delete_cfiles=True)
+    if agingParticles:
+        kernels += pset.Kernel(initialize, delete_cfiles=True)
+        kernels += pset.Kernel(Age, delete_cfiles=True)
+    if with_GC:
+        postProcessFuncs.append(perIterGC)
+    if backwardSimulation:
+        # ==== backward simulation ==== #
+        if args.animate:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=-dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12), moviedt=delta(hours=6), movie_background_field=fieldset.U)
+        else:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=-dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12))
+    else:
+        # ==== forward simulation ==== #
+        if args.animate:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12), moviedt=delta(hours=6), movie_background_field=fieldset.U)
+        else:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12))
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            endtime = ostime.process_time()
+    else:
+        endtime = ostime.process_time()
+
+    if args.write_out:
+        output_file.close()
+
+    if not args.dryrun:
+        if MPI:
+            mpi_comm = MPI.COMM_WORLD
+            # mpi_comm.Barrier()
+            size_Npart = len(pset.nparticle_log)
+            Npart = pset.nparticle_log.get_param(size_Npart-1)
+            Npart = mpi_comm.reduce(Npart, op=MPI.SUM, root=0)
+            if mpi_comm.Get_rank() == 0:
+                if size_Npart>0:
+                    sys.stdout.write("final # particles: {}\n".format( Npart ))
+                sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime-starttime))
+                avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+                sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time*1000.0))
+        else:
+            size_Npart = len(pset.nparticle_log)
+            Npart = pset.nparticle_log.get_param(size_Npart-1)
+            if size_Npart > 0:
+                sys.stdout.write("final # particles: {}\n".format( Npart ))
+            sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime - starttime))
+            avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+            sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time * 1000.0))
+
+        if MPI:
+            mpi_comm = MPI.COMM_WORLD
+            Nparticles = mpi_comm.reduce(np.array(pset.nparticle_log.get_params()), op=MPI.SUM, root=0)
+            Nmem = mpi_comm.reduce(np.array(pset.mem_log.get_params()), op=MPI.SUM, root=0)
+            if mpi_comm.Get_rank() == 0:
+                pset.plot_and_log(memory_used=Nmem, nparticles=Nparticles, target_N=target_N, imageFilePath=imageFileName, odir=odir, xlim_range=[0, 730], ylim_range=[0, 150])
+        else:
+            pset.plot_and_log(target_N=target_N, imageFilePath=imageFileName, odir=odir, xlim_range=[0, 730], ylim_range=[0, 150])
+
+
diff --git a/performance/benchmark_galapagos_backwards.py b/performance/benchmark_galapagos_backwards.py
new file mode 100644
index 0000000000..e8efb204e4
--- /dev/null
+++ b/performance/benchmark_galapagos_backwards.py
@@ -0,0 +1,237 @@
+from parcels import FieldSet, JITParticle, AdvectionRK4, Variable, StateCode, OperationCode, ErrorCode
+from parcels.particleset_benchmark import ParticleSet_Benchmark
+from datetime import timedelta as delta
+from glob import glob
+import numpy as np
+import itertools
+import matplotlib.pyplot as plt
+import xarray as xr
+import warnings
+import math
+import sys
+import os
+import gc
+from  argparse import ArgumentParser
+import fnmatch
+import time as ostime
+#import dask
+warnings.simplefilter("ignore", category=xr.SerializationWarning)
+
+try:
+    from mpi4py import MPI
+except:
+    MPI = None
+
+
+def create_galapagos_fieldset(datahead, periodic_wrap, use_stokes):
+    # dask.config.set({'array.chunk-size': '16MiB'})
+    ddir = os.path.join(datahead,"NEMO-MEDUSA/ORCA0083-N006/")
+    ufiles = sorted(glob(ddir+'means/ORCA0083-N06_20[00-10]*d05U.nc'))
+    vfiles = [u.replace('05U.nc', '05V.nc') for u in ufiles]
+    meshfile = glob(ddir+'domain/coordinates.nc')
+    nemo_files = {'U': {'lon': meshfile, 'lat': meshfile, 'data': ufiles},
+                 'V': {'lon': meshfile, 'lat': meshfile, 'data': vfiles}}
+    nemo_variables = {'U': 'uo', 'V': 'vo'}
+    nemo_dimensions = {'lon': 'glamf', 'lat': 'gphif', 'time': 'time_counter'}
+    period = delta(days=366*11) if periodic_wrap else False  # 10 years period
+    extrapolation = False if periodic_wrap else True
+    # ==== Because the stokes data is a different grid, we actually need to define the chunking ==== #
+    # fieldset_nemo = FieldSet.from_nemo(nemofiles, nemovariables, nemodimensions, field_chunksize='auto')
+    nemo_chs = {'time_counter': 1, 'depthu': 75, 'depthv': 75, 'depthw': 75, 'deptht': 75, 'y': 100, 'x': 100}
+    nemo_nchs = {
+        'U':       {'lon': ('x', 64), 'lat': ('y', 32), 'depth': ('depthu', 25), 'time': ('time_counter', 1)},  #
+        'V':       {'lon': ('x', 64), 'lat': ('y', 32), 'depth': ('depthv', 25), 'time': ('time_counter', 1)},  #
+    }
+    try:
+        fieldset_nemo = FieldSet.from_nemo(nemo_files, nemo_variables, nemo_dimensions, field_chunksize=nemo_chs, time_periodic=period, allow_time_extrapolation=extrapolation)
+    except (SyntaxError, ):
+        fieldset_nemo = FieldSet.from_nemo(nemo_files, nemo_variables, nemo_dimensions, chunksize=nemo_nchs, time_periodic=period, allow_time_extrapolation=extrapolation)
+
+    if wstokes:
+        stokes_files = sorted(glob(datahead+"/WaveWatch3data/CFSR/WW3-*_uss.nc"))
+        stokes_variables = {'U': 'uuss', 'V': 'vuss'}
+        stokes_dimensions = {'lat': 'latitude', 'lon': 'longitude', 'time': 'time'}
+        stokes_chs = {'time': 1, 'latitude': 16, 'longitude': 32}
+        stokes_nchs = {
+            'U': {'lon': ('longitude', 32), 'lat': ('latitude', 16), 'time': ('time', 1)},
+            'V': {'lon': ('longitude', 32), 'lat': ('latitude', 16), 'time': ('time', 1)}
+        }
+        stokes_period = delta(days=366+2*31) if periodic_wrap else False  # 14 month period
+        fieldset_stokes = None
+        try:
+            fieldset_stokes = FieldSet.from_netcdf(stokes_files, stokes_variables, stokes_dimensions, field_chunksize=stokes_chs, time_periodic=stokes_period, allow_time_extrapolation=extrapolation)
+        except (SyntaxError, ):
+            fieldset_stokes = FieldSet.from_netcdf(stokes_files, stokes_variables, stokes_dimensions, chunksize=stokes_nchs, time_periodic=stokes_period, allow_time_extrapolation=extrapolation)
+        fieldset_stokes.add_periodic_halo(zonal=True, meridional=False, halosize=5)
+
+        fieldset = FieldSet(U=fieldset_nemo.U+fieldset_stokes.U, V=fieldset_nemo.V+fieldset_stokes.V)
+        fU = fieldset.U[0]
+    else:
+        fieldset = fieldset_nemo
+        fU = fieldset.U
+
+    return fieldset, fU
+
+
+def perIterGC():
+    gc.collect()
+
+
+class GalapagosParticle(JITParticle):
+    age = Variable('age', initial=0.)
+    life_expectancy = Variable('life_expectancy', dtype=np.float64, initial=14.0*86400.0)  # np.finfo(np.float64).max
+
+
+def Age(particle, fieldset, time):
+    if particle.state == StateCode.Evaluate:
+        particle.age = particle.age + math.fabs(particle.dt)
+    if particle.age > particle.life_expectancy:
+        particle.delete()
+
+
+def DeleteParticle(particle, fieldset, time):
+    particle.delete()
+
+
+def periodicBC(particle, fieldSet, time):
+    dlon = -89.0 + 91.8
+    dlat = 0.7 + 1.4
+    if particle.lon > -89.0:
+        particle.lon -= dlon
+    if particle.lon < -91.8:
+        particle.lon += dlon
+    if particle.lat > 0.7:
+        particle.lat -= dlat
+    if particle.lat < -1.4:
+        particle.lat += dlat
+
+
+if __name__=='__main__':
+    parser = ArgumentParser(description="Example of particle advection around an idealised peninsula")
+    parser.add_argument("-s", "--stokes", dest="stokes", action='store_true', default=False, help="use Stokes' field data")
+    parser.add_argument("-i", "--imageFileName", dest="imageFileName", type=str, default="mpiChunking_plot_MPI.png", help="image file name of the plot")
+    parser.add_argument("-p", "--periodic", dest="periodic", action='store_true', default=False, help="enable/disable periodic wrapping (else: extrapolation)")
+    parser.add_argument("-w", "--writeout", dest="write_out", action='store_true', default=False, help="write data in outfile")
+    # parser.add_argument("-t", "--time_in_days", dest="time_in_days", type=int, default=365, help="runtime in days (default: 365)")
+    parser.add_argument("-t", "--time_in_days", dest="time_in_days", type=str, default="1*365", help="runtime in days (default: 1*365)")
+    parser.add_argument("-G", "--GC", dest="useGC", action='store_true', default=False, help="using a garbage collector (default: false)")
+    args = parser.parse_args()
+
+    wstokes = args.stokes
+    imageFileName=args.imageFileName
+    periodicFlag=args.periodic
+    time_in_days = int(float(eval(args.time_in_days)))
+    with_GC = args.useGC
+
+    branch = "soa_benchmark"
+    computer_env = "local/unspecified"
+    scenario = "galapagos"
+    headdir = ""
+    odir = ""
+    datahead = ""
+    dirread_top = ""
+    dirread_top_bgc = ""
+    dirread_mesh = ""
+    if os.uname()[1] in ['science-bs35', 'science-bs36']:  # Gemini
+        # headdir = "/scratch/{}/experiments/palaeo-parcels".format(os.environ['USER'])
+        headdir = "/scratch/{}/experiments/galapagos".format("ckehl")
+        odir = os.path.join(headdir,"BENCHres")
+        datahead = "/data/oceanparcels/input_data"
+        ddir_head = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/')
+        computer_env = "Gemini"
+    elif fnmatch.fnmatchcase(os.uname()[1], "*.bullx*"):  # Cartesius
+        CARTESIUS_SCRATCH_USERNAME = 'ckehluu'
+        headdir = "/scratch/shared/{}/experiments/galapagos".format(CARTESIUS_SCRATCH_USERNAME)
+        odir = os.path.join(headdir, "BENCHres")
+        datahead = "/projects/0/topios/hydrodynamic_data"
+        ddir_head = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/')
+        computer_env = "Cartesius"
+    else:
+        headdir = "/var/scratch/galapagos"
+        odir = os.path.join(headdir, "BENCHres")
+        datahead = "/data"
+        ddir_head = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/')
+    print("running {} on {} (uname: {}) - branch '{}' - argv: {}".format(scenario, computer_env, os.uname()[1], branch, sys.argv[1:]))
+
+
+
+
+    fieldset, fU = create_galapagos_fieldset(datahead, True, wstokes)
+    fname = os.path.join(odir,"galapagosparticles_bwd_wstokes_v2.nc") if wstokes else os.path.join(odir,"galapagosparticles_bwd_v2.nc")
+
+    galapagos_extent = [-91.8, -89, -1.4, 0.7]
+    startlon, startlat = np.meshgrid(np.arange(galapagos_extent[0], galapagos_extent[1], 0.2),
+                                     np.arange(galapagos_extent[2], galapagos_extent[3], 0.2))
+
+    print("|lon| = {}; |lat| = {}".format(startlon.shape[0], startlat.shape[0]))
+
+    pset = ParticleSet_Benchmark(fieldset=fieldset, pclass=GalapagosParticle, lon=startlon, lat=startlat, time=fU.grid.time[-1], repeatdt=delta(days=7))
+    """ Kernal + Execution"""
+    postProcessFuncs = []
+    if with_GC:
+        postProcessFuncs.append(perIterGC)
+    output_fpath = None
+    outfile = None
+    if args.write_out:
+        output_fpath = fname
+        outfile = pset.ParticleFile(name=output_fpath, outputdt=delta(days=1))
+    kernel = pset.Kernel(AdvectionRK4)+pset.Kernel(Age)+pset.Kernel(periodicBC)
+
+    starttime = 0
+    endtime = 0
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            # global_t_0 = ostime.time()
+            # starttime = MPI.Wtime()
+            starttime = ostime.process_time()
+    else:
+        #starttime = ostime.time()
+        starttime = ostime.process_time()
+
+    pset.execute(kernel, dt=delta(hours=-1), output_file=outfile, recovery={ErrorCode.ErrorOutOfBounds: DeleteParticle}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(days=1))
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            # global_t_0 = ostime.time()
+            # endtime = MPI.Wtime()
+            endtime = ostime.process_time()
+    else:
+        # endtime = ostime.time()
+        endtime = ostime.process_time()
+
+    if args.write_out:
+        outfile.close()
+
+    size_Npart = len(pset.nparticle_log)
+    Npart = pset.nparticle_log.get_param(size_Npart-1)
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        Npart = mpi_comm.reduce(Npart, op=MPI.SUM, root=0)
+        if mpi_comm.Get_rank() == 0:
+            if size_Npart>0:
+                sys.stdout.write("final # particles: {}\n".format( Npart ))
+            sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime-starttime))
+            avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+            sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time*1000.0))
+    else:
+        if size_Npart > 0:
+            sys.stdout.write("final # particles: {}\n".format( Npart ))
+        sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime - starttime))
+        avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+        sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time * 1000.0))
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        # mpi_comm.Barrier()
+        Nparticles = mpi_comm.reduce(np.array(pset.nparticle_log.get_params()), op=MPI.SUM, root=0)
+        Nmem = mpi_comm.reduce(np.array(pset.mem_log.get_params()), op=MPI.SUM, root=0)
+        if mpi_comm.Get_rank() == 0:
+            pset.plot_and_log(memory_used=Nmem, nparticles=Nparticles, target_N=1, imageFilePath=imageFileName, odir=odir)
+    else:
+        pset.plot_and_log(target_N=1, imageFilePath=imageFileName, odir=odir)
+
+    print('Execution finished')
diff --git a/performance/benchmark_palaeo_Y2K.py b/performance/benchmark_palaeo_Y2K.py
new file mode 100644
index 0000000000..9026458740
--- /dev/null
+++ b/performance/benchmark_palaeo_Y2K.py
@@ -0,0 +1,453 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Oct 13 15:31:22 2017
+
+@author: nooteboom
+"""
+import itertools
+
+from parcels import (FieldSet, JITParticle, AdvectionRK4_3D,
+                     Field, ParticleFile, Variable, StateCode, OperationCode, ErrorCode)
+# from parcels import ParticleSet
+from parcels.particleset_benchmark import ParticleSet_Benchmark
+
+from argparse import ArgumentParser
+from datetime import timedelta as delta
+from datetime import datetime
+import numpy as np
+import math
+from glob import glob
+import sys
+import pandas as pd
+
+import os
+import time as ostime
+import fnmatch
+
+# import dask
+import gc
+
+try:
+    from mpi4py import MPI
+except:
+    MPI = None
+
+import warnings
+import xarray as xr
+warnings.simplefilter("ignore", category=xr.SerializationWarning)
+
+global_t_0 = 0
+odir = ""
+
+
+def set_nemo_fieldset(ufiles, vfiles, wfiles, tfiles, pfiles, dfiles, ifiles, bfile, mesh_mask='/scratch/ckehl/experiments/palaeo-parcels/NEMOdata/domain/coordinates.nc', periodicFlag=False):
+    bfile_array = bfile
+    if not isinstance(bfile_array, list):
+        bfile_array = [bfile, ]
+    filenames = {'U': {'lon': mesh_mask,
+                       'lat': mesh_mask,
+                       'depth': [ufiles[0]],
+                       'data': ufiles},
+                 'V': {'lon': mesh_mask,
+                       'lat': mesh_mask,
+                       'depth': [ufiles[0]],
+                       'data': vfiles},
+                 'W': {'lon': mesh_mask,
+                       'lat': mesh_mask,
+                       'depth': [ufiles[0]],
+                       'data': wfiles},
+                 'S': {'lon': mesh_mask,
+                       'lat': mesh_mask,
+                       'data': tfiles},
+                 'T': {'lon': mesh_mask,
+                       'lat': mesh_mask,
+                       'data': tfiles},
+                 'NO3': {'lon': mesh_mask,
+                         'lat': mesh_mask,
+                         'depth': [ufiles[0]],
+                         # 'depth': [pfiles[0]],
+                         'data': pfiles},
+                 'ICE': {'lon': mesh_mask,
+                         'lat': mesh_mask,
+                         'data': ifiles},
+                 'ICEPRES': {'lon': mesh_mask,
+                             'lat': mesh_mask,
+                             'data': ifiles},
+                 'CO2': {'lon': mesh_mask,
+                         'lat': mesh_mask,
+                         'data': dfiles},
+                 'PP': {'lon': mesh_mask,
+                        'lat': mesh_mask,
+                        'depth': [ufiles[0]],
+                        # 'depth': [dfiles[0]],
+                        'data': dfiles},
+                 }
+    if mesh_mask:
+        filenames['mesh_mask'] = mesh_mask
+    variables = {'U': 'uo',  # 3D
+                 'V': 'vo',  # 3D
+                 'W': 'wo',  # 3D
+                 'S': 'sss',  # 2D - 'salin' = 3D
+                 'T': 'sst',  # 2D - 'potemp' = 3D
+                 # 'O2': 'OXY',
+                 'NO3': 'DIN',  # 3D
+                 'ICE': 'sit',  # 2D
+                 'ICEPRES': 'ice_pres',  # 2D
+                 'CO2': 'TCO2',  # 2D
+                 'PP': 'TPP3',  # 3D
+                 # 'B': 'Bathymetry',
+                 }
+
+    dimensions = {'U': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthu', 'time': 'time_counter'},  #
+                  'V': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthu', 'time': 'time_counter'},  #
+                  'W': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthu', 'time': 'time_counter'},  #
+                  'T': {'lon': 'glamf', 'lat': 'gphif', 'time': 'time_counter'},
+                  'S': {'lon': 'glamf', 'lat': 'gphif', 'time': 'time_counter'},
+                  'NO3': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthu', 'time': 'time_counter'},
+                  'ICE': {'lon': 'glamf', 'lat': 'gphif', 'time': 'time_counter'},
+                  'ICEPRES': {'lon': 'glamf', 'lat': 'gphif', 'time': 'time_counter'},
+                  'CO2': {'lon': 'glamf', 'lat': 'gphif', 'time': 'time_counter'},
+                  'PP': {'lon': 'glamf', 'lat': 'gphif', 'depth': 'depthu', 'time': 'time_counter'},
+                  }
+    bfiles = {'lon': mesh_mask, 'lat': mesh_mask, 'data': bfile_array}
+    bvariables = ('B', 'Bathymetry')
+    bdimensions = {'lon': 'glamf', 'lat': 'gphif'}
+    bchs = False
+
+    chs = {'time_counter': 1, 'depthu': 80, 'depthv': 80, 'depthw': 80, 'deptht': 80, 'y': 200, 'x': 200}
+    nchs = {
+        'U':       {'lon': ('x', 128), 'lat': ('y', 96), 'depth': ('depthu', 25), 'time': ('time_counter', 1)},
+        'V':       {'lon': ('x', 128), 'lat': ('y', 96), 'depth': ('depthv', 25), 'time': ('time_counter', 1)},
+        'W':       {'lon': ('x', 128), 'lat': ('y', 96), 'depth': ('depthw', 25), 'time': ('time_counter', 1)},
+        'T':       {'lon': ('x', 128), 'lat': ('y', 96), 'time': ('time_counter', 1)},
+        'S':       {'lon': ('x', 128), 'lat': ('y', 96), 'time': ('time_counter', 1)},
+        'NO3':     {'lon': ('x', 128), 'lat': ('y', 96), 'depth': ('deptht', 25), 'time': ('time_counter', 1)},
+        'PP':      {'lon': ('x', 128), 'lat': ('y', 96), 'time': ('time_counter', 1)},
+        'ICE':     {'lon': ('x', 128), 'lat': ('y', 96), 'time': ('time_counter', 1)},
+        'ICEPRES': {'lon': ('x', 128), 'lat': ('y', 96), 'time': ('time_counter', 1)},
+        'CO2':     {'lon': ('x', 128), 'lat': ('y', 96), 'depth': ('deptht', 25), 'time': ('time_counter', 1)},
+    }
+    #
+    #chs = (1, 75, 200, 200)
+    #
+    #dask.config.set({'array.chunk-size': '6MiB'})
+    #chs = 'auto'
+
+    if mesh_mask: # and isinstance(bfile, list) and len(bfile) > 0:
+        if not periodicFlag:
+            try:
+                # fieldset = FieldSet.from_nemo(filenames, variables, dimensions, allow_time_extrapolation=False, field_chunksize='auto')
+                fieldset = FieldSet.from_nemo(filenames, variables, dimensions, allow_time_extrapolation=True, field_chunksize=chs)
+                Bfield = Field.from_netcdf(bfiles, bvariables, bdimensions, allow_time_extrapolation=True, interp_method='cgrid_tracer', field_chunksize=bchs)
+            except (SyntaxError,):
+                fieldset = FieldSet.from_nemo(filenames, variables, dimensions, allow_time_extrapolation=True, chunksize=nchs)
+                Bfield = Field.from_netcdf(bfiles, bvariables, bdimensions, allow_time_extrapolation=True, interp_method='cgrid_tracer', chunksize=bchs)
+        else:
+            try:
+                fieldset = FieldSet.from_nemo(filenames, variables, dimensions, time_periodic=delta(days=366), field_chunksize=chs)
+                Bfield = Field.from_netcdf(bfiles, bvariables, bdimensions, allow_time_extrapolation=True, interp_method='cgrid_tracer', field_chunksize=bchs)
+            except (SyntaxError, ):
+                fieldset = FieldSet.from_nemo(filenames, variables, dimensions, time_periodic=delta(days=366), chunksize=nchs)
+                Bfield = Field.from_netcdf(bfiles, bvariables, bdimensions, allow_time_extrapolation=True, interp_method='cgrid_tracer', chunksize=bchs)
+        fieldset.add_field(Bfield, 'B')
+        fieldset.U.vmax = 10
+        fieldset.V.vmax = 10
+        fieldset.W.vmax = 10
+        return fieldset
+    else:
+        filenames.pop('B')
+        variables.pop('B')
+        dimensions.pop('B')
+        if not periodicFlag:
+            try:
+                # fieldset = FieldSet.from_netcdf(filenames, variables, dimensions, allow_time_extrapolation=False, field_chunksize=chs)
+                fieldset = FieldSet.from_netcdf(filenames, variables, dimensions, allow_time_extrapolation=True, field_chunksize=chs)
+            except (SyntaxError, ):
+                fieldset = FieldSet.from_netcdf(filenames, variables, dimensions, allow_time_extrapolation=True, chunksize=nchs)
+        else:
+            try:
+                fieldset = FieldSet.from_netcdf(filenames, variables, dimensions, time_periodic=delta(days=366), field_chunksize=chs)
+            except (SyntaxError, ):
+                fieldset = FieldSet.from_netcdf(filenames, variables, dimensions, time_periodic=delta(days=366), chunksize=nchs)
+        fieldset.U.vmax = 10
+        fieldset.V.vmax = 10
+        fieldset.W.vmax = 10
+        return fieldset
+        
+
+def periodicBC(particle, fieldSet, time):
+    if particle.lon > 180.0:
+        particle.lon -= 360.0
+    if particle.lon < -180.0:
+        particle.lon += 360.0
+
+def Sink(particle, fieldset, time):
+    if(particle.depth>fieldset.dwellingdepth):
+        particle.depth = particle.depth + fieldset.sinkspeed * particle.dt
+    elif(particle.depth<=fieldset.dwellingdepth and particle.depth>1):
+        particle.depth = fieldset.surface
+        # == Comment: bad idea to do 'time+dt' here cause those data are (likely) not loaded right now == #
+        # particle.temp = fieldset.T[time+particle.dt, fieldset.surface, particle.lat, particle.lon]
+        # particle.salin = fieldset.S[time+particle.dt, fieldset.surface, particle.lat, particle.lon]
+        # particle.PP = fieldset.PP[time+particle.dt, fieldset.surface, particle.lat, particle.lon]
+        # particle.NO3 = fieldset.NO3[time+particle.dt, fieldset.surface, particle.lat, particle.lon]
+        # particle.ICE = fieldset.ICE[time+particle.dt, fieldset.surface, particle.lat, particle.lon]
+        # particle.ICEPRES = fieldset.ICEPRES[time+particle.dt, fieldset.surface, particle.lat, particle.lon]
+        # particle.CO2 = fieldset.CO2[time+particle.dt, fieldset.surface, particle.lat, particle.lon]
+        particle.temp = fieldset.T[time, fieldset.surface, particle.lat, particle.lon]
+        particle.salin = fieldset.S[time, fieldset.surface, particle.lat, particle.lon]
+        particle.PP = fieldset.PP[time, fieldset.surface, particle.lat, particle.lon]
+        particle.NO3 = fieldset.NO3[time, fieldset.surface, particle.lat, particle.lon]
+        particle.ICE = fieldset.ICE[time, fieldset.surface, particle.lat, particle.lon]
+        particle.ICEPRES = fieldset.ICEPRES[time, fieldset.surface, particle.lat, particle.lon]
+        particle.CO2 = fieldset.CO2[time, fieldset.surface, particle.lat, particle.lon]
+        particle.delete()
+
+def Age(particle, fieldset, time):
+    if particle.state == StateCode.Evaluate:
+        particle.age = particle.age + math.fabs(particle.dt)
+    # if particle.age > fieldset.maxage:
+    #     particle.delete()
+
+def DeleteParticle(particle, fieldset, time):
+    particle.delete()
+
+def perIterGC():
+    gc.collect()
+
+def initials(particle, fieldset, time):
+    if particle.age==0.:
+        particle.depth = fieldset.B[time, fieldset.surface, particle.lat, particle.lon]
+        if(particle.depth  > 5800.):
+            particle.age = (particle.depth - 5799.)*fieldset.sinkspeed
+            particle.depth = 5799.        
+        particle.lon0 = particle.lon
+        particle.lat0 = particle.lat
+        particle.depth0 = particle.depth
+
+class DinoParticle(JITParticle):
+    temp = Variable('temp', dtype=np.float32, initial=np.nan)
+    age = Variable('age', dtype=np.float32, initial=0.)
+    salin = Variable('salin', dtype=np.float32, initial=np.nan)
+    lon0 = Variable('lon0', dtype=np.float32, initial=0.)
+    lat0 = Variable('lat0', dtype=np.float32, initial=0.)
+    depth0 = Variable('depth0',dtype=np.float32, initial=0.)
+    PP = Variable('PP',dtype=np.float32, initial=np.nan)
+    NO3 = Variable('NO3',dtype=np.float32, initial=np.nan)
+    ICE = Variable('ICE',dtype=np.float32, initial=np.nan)
+    ICEPRES = Variable('ICEPRES',dtype=np.float32, initial=np.nan)
+    CO2 = Variable('CO2',dtype=np.float32, initial=np.nan)
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(description="Example of particle advection using in-memory stommel test case")
+    parser.add_argument("-i", "--imageFileName", dest="imageFileName", type=str, default="mpiChunking_plot_MPI.png", help="image file name of the plot")
+    parser.add_argument("-p", "--periodic", dest="periodic", action='store_true', default=False, help="enable/disable periodic wrapping (else: extrapolation)")
+    parser.add_argument("-sp", "--sinking_speed", dest="sp", type=float, default=11.0, help="set the simulation sinking speed in [m/day] (default: 11.0)")
+    parser.add_argument("-dd", "--dwelling_depth", dest="dd", type=float, default=10.0, help="set the dwelling depth (i.e. ocean surface depth) in [m] (default: 10.0)")
+    parser.add_argument("-w", "--writeout", dest="write_out", action='store_true', default=False, help="write data in outfile")
+    # parser.add_argument("-t", "--time_in_days", dest="time_in_days", type=int, default=365, help="runtime in days (default: 365)")
+    parser.add_argument("-t", "--time_in_days", dest="time_in_days", type=str, default="1*365", help="runtime in days (default: 1*365)")
+    parser.add_argument("-G", "--GC", dest="useGC", action='store_true', default=False, help="using a garbage collector (default: false)")
+    parser.add_argument("-pr", "--profiling", dest="profiling", action='store_true', default=False, help="tells that the profiling of the script is activates")
+    args = parser.parse_args()
+
+    sp = args.sp # The sinkspeed m/day
+    dd = args.dd  # The dwelling depth
+    imageFileName=args.imageFileName
+    periodicFlag=args.periodic
+    time_in_days = int(float(eval(args.time_in_days)))
+    time_in_years = int(time_in_days/366.0)
+    with_GC = args.useGC
+
+    branch = "soa_benchmark"
+    computer_env = "local/unspecified"
+    scenario = "palaeo-parcels"
+    headdir = ""
+    odir = ""
+    dirread_pal = ""
+    datahead = ""
+    dirread_top = ""
+    dirread_top_bgc = ""
+    if os.uname()[1] in ['science-bs35', 'science-bs36']:  # Gemini
+        # headdir = "/scratch/{}/experiments/palaeo-parcels".format(os.environ['USER'])
+        headdir = "/scratch/{}/experiments/palaeo-parcels".format("ckehl")
+        odir = os.path.join(headdir,"BENCHres")
+        dirread_pal = os.path.join(headdir,'NEMOdata')
+        datahead = "/data/oceanparcels/input_data"
+        dirread_top = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/')
+        dirread_top_bgc = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/')
+        computer_env = "Gemini"
+    elif fnmatch.fnmatchcase(os.uname()[1], "*.bullx*"):  # Cartesius
+        CARTESIUS_SCRATCH_USERNAME = 'ckehluu'
+        headdir = "/scratch/shared/{}/experiments/palaeo-parcels".format(CARTESIUS_SCRATCH_USERNAME)
+        odir = os.path.join(headdir, "BENCHres")
+        dirread_pal = os.path.join(headdir,'NEMOdata')
+        datahead = "/projects/0/topios/hydrodynamic_data"
+        dirread_top = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/')
+        dirread_top_bgc = os.path.join(datahead, 'NEMO-MEDUSA_BGC/ORCA0083-N006/')
+        computer_env = "Cartesius"
+    else:
+        headdir = "/var/scratch/nooteboom"
+        odir = os.path.join(headdir, "BENCHres")
+        dirread_pal = headdir
+        datahead = "/data"
+        dirread_top = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/')
+        dirread_top_bgc = os.path.join(datahead, 'NEMO-MEDUSA/ORCA0083-N006/')
+
+    # print("running {} on {} (uname: {}) - branch '{}' - headdir: {}; odir: {} - argv: {}".format(scenario, computer_env, os.uname()[1], branch, headdir, odir, sys.argv[1:]))
+    # dirread_pal = '/projects/0/palaeo-parcels/NEMOdata/'
+
+    outfile = 'grid_dd' + str(int(dd))
+    outfile += '_sp' + str(int(sp))
+    if periodicFlag:
+        outfile += '_p'
+    if time_in_years != 1:
+        outfile += '_' + str(time_in_years) + 'y'
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_size = mpi_comm.Get_size()
+        outfile += '_n' + str(mpi_size)
+    if args.profiling:
+        outfile += '_prof'
+    if with_GC:
+        outfile += '_wGC'
+    else:
+        outfile += '_woGC'
+    dirwrite = os.path.join(odir, "sp%d_dd%d" % (int(sp),int(dd)))
+    if not os.path.exists(dirwrite):
+        os.mkdir(dirwrite)
+
+    latlonstruct = pd.read_csv(os.path.join(headdir,"TF_locationsSurfaceSamples_forPeter.csv"))
+    latsz = np.array(latlonstruct.Latitude.tolist())
+    lonsz = np.array(latlonstruct.Longitude.tolist())
+    numlocs = np.logical_and(latsz<1000, lonsz<1000)
+    latsz = latsz[numlocs]
+    lonsz = lonsz[numlocs]
+
+    assert ~(np.isnan(latsz)).any(), 'locations should not contain any NaN values'
+    dep = dd * np.ones(latsz.shape)
+
+    timesz = np.array([datetime(2000, 12, 25) - delta(days=x) for x in range(0,int(365),3)])
+    times = np.empty(shape=(0))
+    depths = np.empty(shape=(0))
+    lons = np.empty(shape=(0))
+    lats = np.empty(shape=(0))
+    for i in range(len(timesz)):
+        lons = np.append(lons,lonsz)
+        lats = np.append(lats, latsz)
+        depths = np.append(depths, np.zeros(len(lonsz), dtype=np.float32))
+        times = np.append(times, np.full(len(lonsz),timesz[i]))
+
+    print("running {} on {} (uname: {}) - branch '{}' - (target) N: {} - argv: {}".format(scenario, computer_env, os.uname()[1], branch, lons.shape[0], sys.argv[1:]))
+
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            # global_t_0 = ostime.time()
+            # global_t_0 = MPI.Wtime()
+            global_t_0 = ostime.process_time()
+    else:
+        # global_t_0 = ostime.time()
+        global_t_0 = ostime.process_time()
+
+    ufiles = sorted(glob(dirread_top + 'means/ORCA0083-N06_2000????d05U.nc'))
+    vfiles = sorted(glob(dirread_top + 'means/ORCA0083-N06_2000????d05V.nc'))
+    wfiles = sorted(glob(dirread_top + 'means/ORCA0083-N06_2000????d05W.nc'))
+    tfiles = sorted(glob(dirread_top + 'means/ORCA0083-N06_2000????d05T.nc'))
+    pfiles = sorted(glob(dirread_top_bgc + 'means/ORCA0083-N06_2000????d05P.nc'))
+    dfiles = sorted(glob(dirread_top_bgc + 'means/ORCA0083-N06_2000????d05D.nc'))
+    ifiles = sorted(glob(dirread_top + 'means/ORCA0083-N06_2000????d05I.nc'))
+    bfile = dirread_top + 'domain/bathymetry_ORCA12_V3.3.nc'
+
+    fieldset = set_nemo_fieldset(ufiles, vfiles, wfiles, tfiles, pfiles, dfiles, ifiles, bfile, os.path.join(dirread_pal, "domain/coordinates.nc"), periodicFlag=periodicFlag)
+    fieldset.add_periodic_halo(zonal=True) 
+    fieldset.add_constant('dwellingdepth', np.float(dd))
+    fieldset.add_constant('sinkspeed', sp/86400.)
+    fieldset.add_constant('maxage', 300000.*86400)
+    fieldset.add_constant('surface', 2.5)
+
+    print("|lon| = {}; |lat| = {}; |depth| = {}, |times| = {}, |grids| = {}".format(lonsz.shape[0], latsz.shape[0], dep.shape[0], times.shape[0], fieldset.gridset.size))
+
+    # ==== Set min/max depths in the fieldset ==== #
+    fs_depths = fieldset.U.depth
+
+    pset = ParticleSet_Benchmark.from_list(fieldset=fieldset, pclass=DinoParticle, lon=lons.tolist(), lat=lats.tolist(), depth=depths.tolist(), time=times)
+
+    """ Kernal + Execution"""
+    postProcessFuncs = []
+    if with_GC:
+        postProcessFuncs.append(perIterGC)
+    output_fpath = None
+    if args.write_out:
+        output_fpath = os.path.join(dirwrite, outfile)
+    pfile = pset.ParticleFile(output_fpath, convert_at_end=True, write_ondelete=True)
+    kernels = pset.Kernel(initials) + Sink + Age  + pset.Kernel(AdvectionRK4_3D) + Age
+
+    starttime = 0
+    endtime = 0
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            # starttime = ostime.time()
+            # starttime = MPI.Wtime()
+            starttime = ostime.process_time()
+    else:
+        # starttime = ostime.time()
+        starttime = ostime.process_time()
+
+    # pset.execute(kernels, runtime=delta(days=365*9), dt=delta(minutes=-20), output_file=pfile, verbose_progress=False,
+    # recovery={ErrorCode.ErrorOutOfBounds: DeleteParticle}, postIterationCallbacks=postProcessFuncs)
+    # postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12)
+    pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(hours=-12), output_file=pfile, verbose_progress=False, recovery={ErrorCode.ErrorOutOfBounds: DeleteParticle}, postIterationCallbacks=postProcessFuncs, callbackdt=np.infty)
+    
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            # endtime = ostime.time()
+            # endtime = MPI.Wtime()
+            endtime = ostime.process_time()
+    else:
+        #endtime = ostime.time()
+        endtime = ostime.process_time()
+
+    if args.write_out:
+        pfile.close()
+
+    size_Npart = len(pset.nparticle_log)
+    Npart = pset.nparticle_log.get_param(size_Npart-1)
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        Npart = mpi_comm.reduce(Npart, op=MPI.SUM, root=0)
+        if mpi_comm.Get_rank() == 0:
+            if size_Npart>0:
+                sys.stdout.write("final # particles: {}\n".format( Npart ))
+            sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime-starttime))
+            avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+            sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time*1000.0))
+    else:
+        if size_Npart > 0:
+            sys.stdout.write("final # particles: {}\n".format( Npart ))
+        sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime - starttime))
+        avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+        sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time * 1000.0))
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        # mpi_comm.Barrier()
+        Nparticles = mpi_comm.reduce(np.array(pset.nparticle_log.get_params()), op=MPI.SUM, root=0)
+        Nmem = mpi_comm.reduce(np.array(pset.mem_log.get_params()), op=MPI.SUM, root=0)
+        if mpi_comm.Get_rank() == 0:
+            pset.plot_and_log(memory_used=Nmem, nparticles=Nparticles, target_N=1, imageFilePath=imageFileName, odir=odir)
+    else:
+        pset.plot_and_log(target_N=1, imageFilePath=imageFileName, odir=odir)
+
+    print('Execution finished')
+
+
+
+
diff --git a/performance/benchmark_perlin.py b/performance/benchmark_perlin.py
new file mode 100644
index 0000000000..ba216f3813
--- /dev/null
+++ b/performance/benchmark_perlin.py
@@ -0,0 +1,461 @@
+"""
+Author: Dr. Christian Kehl
+Date: 11-02-2020
+"""
+
+from parcels import AdvectionEE, AdvectionRK45, AdvectionRK4
+from parcels import FieldSet, ScipyParticle, JITParticle, Variable, AdvectionRK4, StateCode, OperationCode, ErrorCode
+from parcels.particleset_benchmark import ParticleSet_Benchmark as BenchmarkParticleSet
+from parcels.particleset import ParticleSet as DryParticleSet
+from parcels.field import Field, VectorField, NestedField, SummedField
+# from parcels import plotTrajectoriesFile_loadedField
+# from parcels import rng as random
+from parcels import ParcelsRandom
+from datetime import timedelta as delta
+import math
+from argparse import ArgumentParser
+import datetime
+import numpy as np
+import xarray as xr
+import fnmatch
+import sys
+import gc
+import os
+import time as ostime
+# import matplotlib.pyplot as plt
+from parcels.tools import perlin3d
+from parcels.tools import perlin2d
+
+try:
+    from mpi4py import MPI
+except:
+    MPI = None
+with_GC = False
+
+pset = None
+ptype = {'scipy': ScipyParticle, 'jit': JITParticle}
+method = {'RK4': AdvectionRK4, 'EE': AdvectionEE, 'RK45': AdvectionRK45}
+global_t_0 = 0
+Nparticle = int(math.pow(2,10)) # equals to Nparticle = 1024
+#Nparticle = int(math.pow(2,11)) # equals to Nparticle = 2048
+#Nparticle = int(math.pow(2,12)) # equals to Nparticle = 4096
+#Nparticle = int(math.pow(2,13)) # equals to Nparticle = 8192
+#Nparticle = int(math.pow(2,14)) # equals to Nparticle = 16384
+#Nparticle = int(math.pow(2,15)) # equals to Nparticle = 32768
+#Nparticle = int(math.pow(2,16)) # equals to Nparticle = 65536
+#Nparticle = int(math.pow(2,17)) # equals to Nparticle = 131072
+#Nparticle = int(math.pow(2,18)) # equals to Nparticle = 262144
+#Nparticle = int(math.pow(2,19)) # equals to Nparticle = 524288
+
+noctaves=3
+#noctaves=4 # formerly
+perlinres=(1,24,12)  # (1,32,8)
+shapescale=(4,4,4)  # (4,8,8)
+#shapescale=(8,6,6) # formerly
+perlin_persistence=0.6
+img_shape = (int(math.pow(2,noctaves))*perlinres[1]*shapescale[1], int(math.pow(2,noctaves))*perlinres[2]*shapescale[2])
+sx = img_shape[0]/1000.0
+sy = img_shape[1]/1000.0
+a = (10.0 * img_shape[0])
+b = (10.0 * img_shape[1])
+tsteps = 61
+tscale = 6
+scalefac = (40.0 / (1000.0/60.0))  # 40 km/h
+scalefac /= 1000.0
+
+# Idea for 4D: perlin3D creates a time-consistent 3D field
+# Thus, we can use skimage to create shifted/rotated/morphed versions
+# for the depth domain, so that perlin4D = [depth][time][lat][lon].
+# then, we can do a transpose-op in numpy, to get [time][depth][lat][lon]
+
+# we need to modify the kernel.execute / pset.execute so that it returns from the JIT
+# in a given time WITHOUT writing to disk via outfie => introduce a pyloop_dt
+
+def DeleteParticle(particle, fieldset, time):
+    particle.delete()
+
+def RenewParticle(particle, fieldset, time):
+    particle.lat = np.random.rand() * a
+    particle.lon = np.random.rand() * b
+
+def perIterGC():
+    gc.collect()
+
+def perlin_fieldset_from_numpy(periodic_wrap=False, write_out=False):
+    """Simulate a current from structured random noise (i.e. Perlin noise).
+    we use the external package 'perlin-numpy' as field generator, see:
+    https://github.com/pvigier/perlin-numpy
+
+    Perlin noise was introduced in the literature here:
+    Perlin, Ken (July 1985). "An Image Synthesizer". SIGGRAPH Comput. Graph. 19 (97–8930), p. 287–296.
+    doi:10.1145/325165.325247, https://dl.acm.org/doi/10.1145/325334.325247
+
+    :param write_out: False if no write-out; else the fieldset path+basename
+    """
+
+    # Coordinates of the test fieldset (on A-grid in deg)
+    lon = np.linspace(-a*0.5, a*0.5, img_shape[0], dtype=np.float32)
+    sys.stdout.write("lon field: {}\n".format(lon.size))
+    lat = np.linspace(-b*0.5, b*0.5, img_shape[1], dtype=np.float32)
+    sys.stdout.write("lat field: {}\n".format(lat.size))
+    totime = tsteps*tscale*24.0*60.0*60.0
+    time = np.linspace(0., totime, tsteps, dtype=np.float64)
+    sys.stdout.write("time field: {}\n".format(time.size))
+
+    # Define arrays U (zonal), V (meridional)
+    U = perlin2d.generate_fractal_noise_temporal2d(img_shape, tsteps, (perlinres[1], perlinres[2]), noctaves, perlin_persistence, max_shift=((-1, 2), (-1, 2)))
+    U = np.transpose(U, (0,2,1))
+    # U = np.swapaxes(U, 1, 2)
+    # print("U-statistics - min: {:10.7f}; max: {:10.7f}; avg. {:10.7f}; std_dev: {:10.7f}".format(U.min(initial=0), U.max(initial=0), U.mean(), U.std()))
+    V = perlin2d.generate_fractal_noise_temporal2d(img_shape, tsteps, (perlinres[1], perlinres[2]), noctaves, perlin_persistence, max_shift=((-1, 2), (-1, 2)))
+    V = np.transpose(V, (0,2,1))
+    # V = np.swapaxes(V, 1, 2)
+    # print("V-statistics - min: {:10.7f}; max: {:10.7f}; avg. {:10.7f}; std_dev: {:10.7f}".format(V.min(initial=0), V.max(initial=0), V.mean(), V.std()))
+
+    # U = perlin3d.generate_fractal_noise_3d(img_shape, perlinres, noctaves, perlin_persistence) * scalefac
+    # U = np.transpose(U, (0,2,1))
+    # sys.stdout.write("U field shape: {} - [tdim][ydim][xdim]=[{}][{}][{}]\n".format(U.shape, time.shape[0], lat.shape[0], lon.shape[0]))
+    # V = perlin3d.generate_fractal_noise_3d(img_shape, perlinres, noctaves, perlin_persistence) * scalefac
+    # V = np.transpose(V, (0,2,1))
+    # sys.stdout.write("V field shape: {} - [tdim][ydim][xdim]=[{}][{}][{}]\n".format(V.shape, time.shape[0], lat.shape[0], lon.shape[0]))
+
+    U *= scalefac
+    V *= scalefac
+
+    data = {'U': U, 'V': V}
+    dimensions = {'time': time, 'lon': lon, 'lat': lat}
+    fieldset = None
+    if periodic_wrap:
+        fieldset = FieldSet.from_data(data, dimensions, mesh='flat', transpose=False, time_periodic=delta(days=366))
+    else:
+        fieldset = FieldSet.from_data(data, dimensions, mesh='flat', transpose=False, allow_time_extrapolation=True)
+    if write_out:
+        fieldset.write(filename=write_out)
+    return fieldset
+
+
+def perlin_fieldset_from_xarray(periodic_wrap=False):
+    """Simulate a current from structured random noise (i.e. Perlin noise).
+    we use the external package 'perlin-numpy' as field generator, see:
+    https://github.com/pvigier/perlin-numpy
+
+    Perlin noise was introduced in the literature here:
+    Perlin, Ken (July 1985). "An Image Synthesizer". SIGGRAPH Comput. Graph. 19 (97–8930), p. 287–296.
+    doi:10.1145/325165.325247, https://dl.acm.org/doi/10.1145/325334.325247
+    """
+    img_shape = (perlinres[0]*shapescale[0], int(math.pow(2,noctaves))*perlinres[1]*shapescale[1], int(math.pow(2,noctaves))*perlinres[2]*shapescale[2])
+
+    # Coordinates of the test fieldset (on A-grid in deg)
+    lon = np.linspace(0, a, img_shape[1], dtype=np.float32)
+    lat = np.linspace(0, b, img_shape[2], dtype=np.float32)
+    totime = img_shape[0] * 24.0 * 60.0 * 60.0
+    time = np.linspace(0., totime, img_shape[0], dtype=np.float32)
+
+    # Define arrays U (zonal), V (meridional), W (vertical) and P (sea
+    # surface height) all on A-grid
+    U = perlin3d.generate_fractal_noise_3d(img_shape, perlinres, noctaves, perlin_persistence) * scalefac
+    U = np.transpose(U, (0,2,1))
+    V = perlin3d.generate_fractal_noise_3d(img_shape, perlinres, noctaves, perlin_persistence) * scalefac
+    V = np.transpose(V, (0,2,1))
+    #P = perlin3d.generate_fractal_noise_3d(img_shape, perlinres, noctaves, perlin_persistence) * scalefac
+
+    dimensions = {'time': time, 'lon': lon, 'lat': lat}
+    dims = ('time', 'lat', 'lon')
+    data = {'Uxr': xr.DataArray(U, coords=dimensions, dims=dims),
+            'Vxr': xr.DataArray(V, coords=dimensions, dims=dims)}   #,'Pxr': xr.DataArray(P, coords=dimensions, dims=dims)
+    ds = xr.Dataset(data)
+
+    variables = {'U': 'Uxr', 'V': 'Vxr'}
+    dimensions = {'time': 'time', 'lat': 'lat', 'lon': 'lon'}
+    if periodic_wrap:
+        return FieldSet.from_xarray_dataset(ds, variables, dimensions, mesh='flat', time_periodic=delta(days=366))
+    else:
+        return FieldSet.from_xarray_dataset(ds, variables, dimensions, mesh='flat', allow_time_extrapolation=True)
+
+class AgeParticle_JIT(JITParticle):
+    age = Variable('age', dtype=np.float64, initial=0.0)
+    life_expectancy = Variable('life_expectancy', dtype=np.float64, initial=np.finfo(np.float64).max)
+    initialized_dynamic = Variable('initialized_dynamic', dtype=np.int32, initial=0)
+
+class AgeParticle_SciPy(ScipyParticle):
+    age = Variable('age', dtype=np.float64, initial=0.0)
+    life_expectancy = Variable('life_expectancy', dtype=np.float64, initial=np.finfo(np.float64).max)
+    initialized_dynamic = Variable('initialized_dynamic', dtype=np.int32, initial=0)
+
+def initialize(particle, fieldset, time):
+    if particle.initialized_dynamic < 1:
+        np_scaler = math.sqrt(3.0 / 2.0)
+        particle.life_expectancy = time + ParcelsRandom.uniform(.0, (fieldset.life_expectancy-time) * 2.0 / np_scaler)
+        # particle.life_expectancy = time + ParcelsRandom.uniform(.0, (fieldset.life_expectancy-time)*math.sqrt(3.0 / 2.0))
+        # particle.life_expectancy = time + ParcelsRandom.uniform(.0, fieldset.life_expectancy) * math.sqrt(3.0 / 2.0)
+        # particle.life_expectancy = time+ParcelsRandom.uniform(.0, fieldset.life_expectancy) * ((3.0/2.0)**2.0)
+        particle.initialized_dynamic = 1
+
+def Age(particle, fieldset, time):
+    if particle.state == StateCode.Evaluate:
+        particle.age = particle.age + math.fabs(particle.dt)
+    if particle.age > particle.life_expectancy:
+        particle.delete()
+
+age_ptype = {'scipy': AgeParticle_SciPy, 'jit': AgeParticle_JIT}
+
+if __name__=='__main__':
+    parser = ArgumentParser(description="Example of particle advection using in-memory stommel test case")
+    parser.add_argument("-i", "--imageFileName", dest="imageFileName", type=str, default="mpiChunking_plot_MPI.png", help="image file name of the plot")
+    parser.add_argument("-b", "--backwards", dest="backwards", action='store_true', default=False, help="enable/disable running the simulation backwards")
+    parser.add_argument("-p", "--periodic", dest="periodic", action='store_true', default=False, help="enable/disable periodic wrapping (else: extrapolation)")
+    parser.add_argument("-r", "--release", dest="release", action='store_true', default=False, help="continuously add particles via repeatdt (default: False)")
+    parser.add_argument("-rt", "--releasetime", dest="repeatdt", type=int, default=720, help="repeating release rate of added particles in Minutes (default: 720min = 12h)")
+    parser.add_argument("-a", "--aging", dest="aging", action='store_true', default=False, help="Removed aging particles dynamically (default: False)")
+    parser.add_argument("-t", "--time_in_days", dest="time_in_days", type=int, default=1, help="runtime in days (default: 1)")
+    parser.add_argument("-x", "--xarray", dest="use_xarray", action='store_true', default=False, help="use xarray as data backend")
+    parser.add_argument("-w", "--writeout", dest="write_out", action='store_true', default=False, help="write data in outfile")
+    parser.add_argument("-d", "--delParticle", dest="delete_particle", action='store_true', default=False, help="switch to delete a particle (True) or reset a particle (default: False).")
+    parser.add_argument("-A", "--animate", dest="animate", action='store_true', default=False, help="animate the particle trajectories during the run or not (default: False).")
+    parser.add_argument("-V", "--visualize", dest="visualize", action='store_true', default=False, help="Visualize particle trajectories at the end (default: False). Requires -w in addition to take effect.")
+    parser.add_argument("-N", "--n_particles", dest="nparticles", type=str, default="2**6", help="number of particles to generate and advect (default: 2e6)")
+    parser.add_argument("-sN", "--start_n_particles", dest="start_nparticles", type=str, default="96", help="(optional) number of particles generated per release cycle (if --rt is set) (default: 96)")
+    parser.add_argument("-m", "--mode", dest="compute_mode", choices=['jit','scipy'], default="jit", help="computation mode = [JIT, SciPp]")
+    parser.add_argument("-G", "--GC", dest="useGC", action='store_true', default=False, help="using a garbage collector (default: false)")
+    parser.add_argument("--dry", dest="dryrun", action="store_true", default=False, help="Start dry run (no benchmarking and its classes")
+    args = parser.parse_args()
+
+    ParticleSet = BenchmarkParticleSet
+    if args.dryrun:
+        ParticleSet = DryParticleSet
+
+    imageFileName=args.imageFileName
+    periodicFlag=args.periodic
+    backwardSimulation = args.backwards
+    repeatdtFlag=args.release
+    repeatRateMinutes=args.repeatdt
+    time_in_days = args.time_in_days
+    use_xarray = args.use_xarray
+    agingParticles = args.aging
+    with_GC = args.useGC
+    Nparticle = int(float(eval(args.nparticles)))
+    target_N = Nparticle
+    addParticleN = 1
+    # np_scaler = math.sqrt(3.0/2.0)
+    # np_scaler = (3.0 / 2.0)**2.0       # **
+    np_scaler = 3.0 / 2.0
+    # cycle_scaler = math.sqrt(3.0/2.0)
+    # cycle_scaler = (3.0 / 2.0)**2.0    # **
+    # cycle_scaler = 3.0 / 2.0
+    cycle_scaler = 7.0 / 4.0
+    start_N_particles = int(float(eval(args.start_nparticles)))
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        if mpi_comm.Get_rank() == 0:
+            if agingParticles and not repeatdtFlag:
+                sys.stdout.write("N: {} ( {} )\n".format(Nparticle, int(Nparticle * np_scaler)))
+            else:
+                sys.stdout.write("N: {}\n".format(Nparticle))
+    else:
+        if agingParticles and not repeatdtFlag:
+            sys.stdout.write("N: {} ( {} )\n".format(Nparticle, int(Nparticle * np_scaler)))
+        else:
+            sys.stdout.write("N: {}\n".format(Nparticle))
+
+    dt_minutes = 60
+    #dt_minutes = 20
+    #random.seed(123456)
+    nowtime = datetime.datetime.now()
+    ParcelsRandom.seed(nowtime.microsecond)
+
+    branch = "soa_benchmark"
+    computer_env = "local/unspecified"
+    scenario = "perlin"
+    odir = ""
+    if os.uname()[1] in ['science-bs35', 'science-bs36']:  # Gemini
+        # odir = "/scratch/{}/experiments".format(os.environ['USER'])
+        odir = "/scratch/{}/experiments".format("ckehl")
+        computer_env = "Gemini"
+    elif fnmatch.fnmatchcase(os.uname()[1], "*.bullx*"):  # Cartesius
+        CARTESIUS_SCRATCH_USERNAME = 'ckehluu'
+        odir = "/scratch/shared/{}/experiments".format(CARTESIUS_SCRATCH_USERNAME)
+        computer_env = "Cartesius"
+    else:
+        odir = "/var/scratch/experiments"
+    print("running {} on {} (uname: {}) - branch '{}' - (target) N: {} - argv: {}".format(scenario, computer_env, os.uname()[1], branch, target_N, sys.argv[1:]))
+
+    if os.path.sep in imageFileName:
+        head_dir = os.path.dirname(imageFileName)
+        if head_dir[0] == os.path.sep:
+            odir = head_dir
+        else:
+            odir = os.path.join(odir, head_dir)
+            imageFileName = os.path.split(imageFileName)[1]
+
+    func_time = []
+    mem_used_GB = []
+
+    np.random.seed(0)
+    fieldset = None
+    if use_xarray:
+        fieldset = perlin_fieldset_from_xarray(periodic_wrap=periodicFlag)
+    else:
+        field_fpath = False
+        if args.write_out:
+            field_fpath = os.path.join(odir,"perlin")
+        fieldset = perlin_fieldset_from_numpy(periodic_wrap=periodicFlag, write_out=field_fpath)
+
+    if args.compute_mode is 'scipy':
+        Nparticle = 2**10
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            global_t_0 = ostime.process_time()
+    else:
+        global_t_0 = ostime.process_time()
+
+    simStart = None
+    for f in fieldset.get_fields():
+        if type(f) in [VectorField, NestedField, SummedField]:  # or not f.grid.defer_load
+            continue
+        else:
+            if backwardSimulation:
+                simStart=f.grid.time_full[-1]
+            else:
+                simStart = f.grid.time_full[0]
+            break
+
+    if agingParticles:
+        if not repeatdtFlag:
+            Nparticle = int(Nparticle * np_scaler)
+        fieldset.add_constant('life_expectancy', delta(days=time_in_days).total_seconds())
+    if repeatdtFlag:
+        addParticleN = Nparticle/2.0
+        refresh_cycle = (delta(days=time_in_days).total_seconds() / (addParticleN/start_N_particles)) / cycle_scaler
+        if agingParticles:
+            refresh_cycle /= cycle_scaler
+        repeatRateMinutes = int(refresh_cycle/60.0) if repeatRateMinutes == 720 else repeatRateMinutes
+
+    if backwardSimulation:
+        # ==== backward simulation ==== #
+        if agingParticles:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * a, lat=np.random.rand(start_N_particles, 1) * b, time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(int(addParticleN), 1) * a, lat=np.random.rand(int(addParticleN), 1) * b, time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * a, lat=np.random.rand(Nparticle, 1) * b, time=simStart)
+        else:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * a, lat=np.random.rand(start_N_particles, 1) * b, time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(int(addParticleN), 1) * a, lat=np.random.rand(int(addParticleN), 1) * b, time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * a, lat=np.random.rand(Nparticle, 1) * b, time=simStart)
+    else:
+        # ==== forward simulation ==== #
+        if agingParticles:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * a, lat=np.random.rand(start_N_particles, 1) * b, time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(int(addParticleN), 1) * a, lat=np.random.rand(int(addParticleN), 1) * b, time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * a, lat=np.random.rand(Nparticle, 1) * b, time=simStart)
+        else:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * a, lat=np.random.rand(start_N_particles, 1) * b, time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(int(addParticleN), 1) * a, lat=np.random.rand(int(addParticleN), 1) * b, time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * a, lat=np.random.rand(Nparticle, 1) * b, time=simStart)
+
+    output_file = None
+    out_fname = "benchmark_perlin"
+    if args.write_out:
+        if MPI and (MPI.COMM_WORLD.Get_size()>1):
+            out_fname += "_MPI"
+        else:
+            out_fname += "_noMPI"
+        if periodicFlag:
+            out_fname += "_p"
+        out_fname += "_n"+str(Nparticle)
+        if backwardSimulation:
+            out_fname += "_bwd"
+        else:
+            out_fname += "_fwd"
+        if repeatdtFlag:
+            out_fname += "_add"
+        if agingParticles:
+            out_fname += "_age"
+        output_file = pset.ParticleFile(name=os.path.join(odir,out_fname+".nc"), outputdt=delta(hours=24))
+    delete_func = RenewParticle
+    if args.delete_particle:
+        delete_func=DeleteParticle
+    postProcessFuncs = []
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            starttime = ostime.process_time()
+    else:
+        starttime = ostime.process_time()
+    kernels = pset.Kernel(AdvectionRK4,delete_cfiles=True)
+    if agingParticles:
+        kernels += pset.Kernel(initialize, delete_cfiles=True)
+        kernels += pset.Kernel(Age, delete_cfiles=True)
+    if with_GC:
+        postProcessFuncs.append(perIterGC)
+    if backwardSimulation:
+        # ==== backward simulation ==== #
+        if args.animate:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=-dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12), moviedt=delta(hours=6), movie_background_field=fieldset.U)
+        else:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=-dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12))
+    else:
+        # ==== forward simulation ==== #
+        if args.animate:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12), moviedt=delta(hours=6), movie_background_field=fieldset.U)
+        else:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12))
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            endtime = ostime.process_time()
+    else:
+        endtime = ostime.process_time()
+
+    if args.write_out:
+        output_file.close()
+
+    if not args.dryrun:
+        if MPI:
+            mpi_comm = MPI.COMM_WORLD
+            # mpi_comm.Barrier()
+            size_Npart = len(pset.nparticle_log)
+            Npart = pset.nparticle_log.get_param(size_Npart-1)
+            Npart = mpi_comm.reduce(Npart, op=MPI.SUM, root=0)
+            if mpi_comm.Get_rank() == 0:
+                if size_Npart>0:
+                    sys.stdout.write("final # particles: {}\n".format( Npart ))
+                sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime-starttime))
+                avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+                sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time*1000.0))
+        else:
+            size_Npart = len(pset.nparticle_log)
+            Npart = pset.nparticle_log.get_param(size_Npart-1)
+            if size_Npart > 0:
+                sys.stdout.write("final # particles: {}\n".format( Npart ))
+            sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime - starttime))
+            avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+            sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time * 1000.0))
+
+        if MPI:
+            mpi_comm = MPI.COMM_WORLD
+            Nparticles = mpi_comm.reduce(np.array(pset.nparticle_log.get_params()), op=MPI.SUM, root=0)
+            Nmem = mpi_comm.reduce(np.array(pset.mem_log.get_params()), op=MPI.SUM, root=0)
+            if mpi_comm.Get_rank() == 0:
+                pset.plot_and_log(memory_used=Nmem, nparticles=Nparticles, target_N=target_N, imageFilePath=imageFileName, odir=odir, xlim_range=[0, 730], ylim_range=[0, 150])
+        else:
+            pset.plot_and_log(target_N=target_N, imageFilePath=imageFileName, odir=odir, xlim_range=[0, 730], ylim_range=[0, 150])
+
+
diff --git a/performance/benchmark_stommel.py b/performance/benchmark_stommel.py
new file mode 100644
index 0000000000..7761d32859
--- /dev/null
+++ b/performance/benchmark_stommel.py
@@ -0,0 +1,508 @@
+"""
+Author: Dr. Christian Kehl
+Date: 11-02-2020
+"""
+
+from parcels import AdvectionEE, AdvectionRK45, AdvectionRK4
+from parcels import FieldSet, ScipyParticle, JITParticle, Variable, AdvectionRK4, StateCode, OperationCode, ErrorCode
+from parcels.particleset_benchmark import ParticleSet_Benchmark as BenchmarkParticleSet
+from parcels.particleset import ParticleSet as DryParticleSet
+# from parcels.kernel_benchmark import Kernel_Benchmark as Kernel
+from parcels.field import VectorField, NestedField, SummedField
+# from parcels import plotTrajectoriesFile_loadedField
+# from parcels import rng as random
+from parcels import ParcelsRandom
+from datetime import timedelta as delta
+import math
+from argparse import ArgumentParser
+import datetime
+import numpy as np
+import xarray as xr
+# import pytest
+import fnmatch
+import gc
+import os
+import time as ostime
+
+import sys
+try:
+    from mpi4py import MPI
+except:
+    MPI = None
+with_GC = False
+
+pset = None
+
+
+# ptype = {'scipy': ScipyParticle, 'jit': JITParticle}
+method = {'RK4': AdvectionRK4, 'EE': AdvectionEE, 'RK45': AdvectionRK45}
+global_t_0 = 0
+Nparticle = int(math.pow(2,10)) # equals to Nparticle = 1024
+#Nparticle = int(math.pow(2,11)) # equals to Nparticle = 2048
+#Nparticle = int(math.pow(2,12)) # equals to Nparticle = 4096
+#Nparticle = int(math.pow(2,13)) # equals to Nparticle = 8192
+#Nparticle = int(math.pow(2,14)) # equals to Nparticle = 16384
+#Nparticle = int(math.pow(2,15)) # equals to Nparticle = 32768
+#Nparticle = int(math.pow(2,16)) # equals to Nparticle = 65536
+#Nparticle = int(math.pow(2,17)) # equals to Nparticle = 131072
+#Nparticle = int(math.pow(2,18)) # equals to Nparticle = 262144
+#Nparticle = int(math.pow(2,19)) # equals to Nparticle = 524288
+
+a = 10000 * 1e3
+b = 10000 * 1e3
+scalefac = 0.05  # to scale for physically meaningful velocities
+
+def DeleteParticle(particle, fieldset, time):
+    particle.delete()
+
+def RenewParticle(particle, fieldset, time):
+    particle.lat = np.random.rand() * a
+    particle.lon = np.random.rand() * b
+
+def perIterGC():
+    gc.collect()
+
+def stommel_fieldset_from_numpy(xdim=200, ydim=200, periodic_wrap=False, write_out=False):
+    """Simulate a periodic current along a western boundary, with significantly
+    larger velocities along the western edge than the rest of the region
+
+    The original test description can be found in: N. Fabbroni, 2009,
+    Numerical Simulation of Passive tracers dispersion in the sea,
+    Ph.D. dissertation, University of Bologna
+    http://amsdottorato.unibo.it/1733/1/Fabbroni_Nicoletta_Tesi.pdf
+    """
+
+    # Coordinates of the test fieldset (on A-grid in deg)
+    lon = np.linspace(0, a, xdim, dtype=np.float32)
+    lat = np.linspace(0, b, ydim, dtype=np.float32)
+    totime = ydim*24.0*60.0*60.0
+    time = np.linspace(0., totime, ydim, dtype=np.float64)
+
+    # Define arrays U (zonal), V (meridional), W (vertical) and P (sea
+    # surface height) all on A-grid
+    U = np.zeros((lon.size, lat.size, time.size), dtype=np.float32)
+    V = np.zeros((lon.size, lat.size, time.size), dtype=np.float32)
+    P = np.zeros((lon.size, lat.size, time.size), dtype=np.float32)
+
+    beta = 2e-11
+    r = 1/(11.6*86400)
+    es = r/(beta*a)
+
+    for i in range(lon.size):
+        for j in range(lat.size):
+            xi = lon[i] / a
+            yi = lat[j] / b
+            P[i, j, 0] = (1 - math.exp(-xi/es) - xi) * math.pi * np.sin(math.pi*yi)*scalefac
+            U[i, j, 0] = -(1 - math.exp(-xi/es) - xi) * math.pi**2 * np.cos(math.pi*yi)*scalefac
+            V[i, j, 0] = (math.exp(-xi/es)/es - 1) * math.pi * np.sin(math.pi*yi)*scalefac
+
+    for t in range(1, time.size):
+        for i in range(lon.size):
+            for j in range(lat.size):
+                P[i, j, t] = P[i, j - 1, t - 1]
+                U[i, j, t] = U[i, j - 1, t - 1]
+                V[i, j, t] = V[i, j - 1, t - 1]
+
+    data = {'U': U, 'V': V, 'P': P}
+    dimensions = {'time': time, 'lon': lon, 'lat': lat}
+    fieldset = None
+    if periodic_wrap:
+        fieldset = FieldSet.from_data(data, dimensions, mesh='flat', transpose=True, time_periodic=delta(days=366))
+    else:
+        fieldset = FieldSet.from_data(data, dimensions, mesh='flat', transpose=True, allow_time_extrapolation=True)
+    if write_out:
+        fieldset.write(filename=write_out)
+    return fieldset
+
+
+def stommel_fieldset_from_xarray(xdim=200, ydim=200, periodic_wrap=False):
+    """Simulate a periodic current along a western boundary, with significantly
+    larger velocities along the western edge than the rest of the region
+
+    The original test description can be found in: N. Fabbroni, 2009,
+    Numerical Simulation of Passive tracers dispersion in the sea,
+    Ph.D. dissertation, University of Bologna
+    http://amsdottorato.unibo.it/1733/1/Fabbroni_Nicoletta_Tesi.pdf
+    """
+    # Coordinates of the test fieldset (on A-grid in deg)
+    lon = np.linspace(0., a, xdim, dtype=np.float32)
+    lat = np.linspace(0., b, ydim, dtype=np.float32)
+    totime = ydim*24.0*60.0*60.0
+    time = np.linspace(0., totime, ydim, dtype=np.float64)
+    # Define arrays U (zonal), V (meridional), W (vertical) and P (sea
+    # surface height) all on A-grid
+    U = np.zeros((lon.size, lat.size, time.size), dtype=np.float32)
+    V = np.zeros((lon.size, lat.size, time.size), dtype=np.float32)
+    P = np.zeros((lon.size, lat.size, time.size), dtype=np.float32)
+
+    beta = 2e-11
+    r = 1/(11.6*86400)
+    es = r/(beta*a)
+
+    for i in range(lat.size):
+        for j in range(lon.size):
+            xi = lon[j] / a
+            yi = lat[i] / b
+            P[i, j, 0] = (1 - math.exp(-xi/es) - xi) * math.pi * np.sin(math.pi*yi)*scalefac
+            U[i, j, 0] = -(1 - math.exp(-xi/es) - xi) * math.pi**2 * np.cos(math.pi*yi)*scalefac
+            V[i, j, 0] = (math.exp(-xi/es)/es - 1) * math.pi * np.sin(math.pi*yi)*scalefac
+
+    for t in range(1, time.size):
+        for i in range(lon.size):
+            for j in range(lat.size):
+                P[i, j, t] = P[i, j - 1, t - 1]
+                U[i, j, t] = U[i, j - 1, t - 1]
+                V[i, j, t] = V[i, j - 1, t - 1]
+
+    dimensions = {'time': time, 'lon': lon, 'lat': lat}
+    dims = ('lon', 'lat', 'time')
+    data = {'Uxr': xr.DataArray(U, coords=dimensions, dims=dims),
+            'Vxr': xr.DataArray(V, coords=dimensions, dims=dims),
+            'Pxr': xr.DataArray(P, coords=dimensions, dims=dims)}
+    ds = xr.Dataset(data)
+
+    pvariables = {'U': 'Uxr', 'V': 'Vxr', 'P': 'Pxr'}
+    pdimensions = {'time': 'time', 'lat': 'lat', 'lon': 'lon'}
+    if periodic_wrap:
+        return FieldSet.from_xarray_dataset(ds, pvariables, pdimensions, mesh='flat', time_periodic=delta(days=366))
+    else:
+        return FieldSet.from_xarray_dataset(ds, pvariables, pdimensions, mesh='flat', allow_time_extrapolation=True)
+
+
+class StommelParticleJ(JITParticle):
+    p = Variable('p', dtype=np.float32, initial=.0)
+    p0 = Variable('p0', dtype=np.float32, initial=.0)
+
+class StommelParticleS(ScipyParticle):
+    p = Variable('p', dtype=np.float32, initial=.0)
+    p0 = Variable('p0', dtype=np.float32, initial=.0)
+
+class AgeParticle_JIT(StommelParticleJ):
+    age = Variable('age', dtype=np.float64, initial=0.0)
+    life_expectancy = Variable('life_expectancy', dtype=np.float64, initial=np.finfo(np.float64).max)
+    initialized_dynamic = Variable('initialized_dynamic', dtype=np.int32, initial=0)
+
+class AgeParticle_SciPy(StommelParticleS):
+    age = Variable('age', dtype=np.float64, initial=0.0)
+    life_expectancy = Variable('life_expectancy', dtype=np.float64, initial=np.finfo(np.float64).max)
+    initialized_dynamic = Variable('initialized_dynamic', dtype=np.int32, initial=0)
+
+def initialize(particle, fieldset, time):
+    if particle.initialized_dynamic < 1:
+        particle.life_expectancy = time+ParcelsRandom.uniform(.0, fieldset.life_expectancy) * math.sqrt(3.0/2.0)
+        particle.initialized_dynamic = 1
+
+def Age(particle, fieldset, time):
+    if particle.state == StateCode.Evaluate:
+        particle.age = particle.age + math.fabs(particle.dt)
+
+    if particle.age > particle.life_expectancy:
+        particle.delete()
+
+ptype = {'scipy': StommelParticleS, 'jit': StommelParticleJ}
+age_ptype = {'scipy': AgeParticle_SciPy, 'jit': AgeParticle_JIT}
+
+if __name__=='__main__':
+    parser = ArgumentParser(description="Example of particle advection using in-memory stommel test case")
+    parser.add_argument("-i", "--imageFileName", dest="imageFileName", type=str, default="mpiChunking_plot_MPI.png", help="image file name of the plot")
+    parser.add_argument("-b", "--backwards", dest="backwards", action='store_true', default=False, help="enable/disable running the simulation backwards")
+    parser.add_argument("-p", "--periodic", dest="periodic", action='store_true', default=False, help="enable/disable periodic wrapping (else: extrapolation)")
+    parser.add_argument("-r", "--release", dest="release", action='store_true', default=False, help="continuously add particles via repeatdt (default: False)")
+    parser.add_argument("-rt", "--releasetime", dest="repeatdt", type=int, default=720, help="repeating release rate of added particles in Minutes (default: 720min = 12h)")
+    parser.add_argument("-a", "--aging", dest="aging", action='store_true', default=False, help="Removed aging particles dynamically (default: False)")
+    parser.add_argument("-t", "--time_in_days", dest="time_in_days", type=int, default=1, help="runtime in days (default: 1)")
+    parser.add_argument("-x", "--xarray", dest="use_xarray", action='store_true', default=False, help="use xarray as data backend")
+    parser.add_argument("-w", "--writeout", dest="write_out", action='store_true', default=False, help="write data in outfile")
+    parser.add_argument("-d", "--delParticle", dest="delete_particle", action='store_true', default=False, help="switch to delete a particle (True) or reset a particle (default: False).")
+    parser.add_argument("-A", "--animate", dest="animate", action='store_true', default=False, help="animate the particle trajectories during the run or not (default: False).")
+    parser.add_argument("-V", "--visualize", dest="visualize", action='store_true', default=False, help="Visualize particle trajectories at the end (default: False). Requires -w in addition to take effect.")
+    parser.add_argument("-N", "--n_particles", dest="nparticles", type=str, default="2**6", help="number of particles to generate and advect (default: 2e6)")
+    parser.add_argument("-sN", "--start_n_particles", dest="start_nparticles", type=str, default="96", help="(optional) number of particles generated per release cycle (if --rt is set) (default: 96)")
+    parser.add_argument("-m", "--mode", dest="compute_mode", choices=['jit','scipy'], default="jit", help="computation mode = [JIT, SciPp]")
+    parser.add_argument("-G", "--GC", dest="useGC", action='store_true', default=False, help="using a garbage collector (default: false)")
+    args = parser.parse_args()
+
+    ParticleSet = BenchmarkParticleSet
+    if args.dryrun:
+        ParticleSet = DryParticleSet
+
+    imageFileName=args.imageFileName
+    periodicFlag=args.periodic
+    backwardSimulation = args.backwards
+    repeatdtFlag=args.release
+    repeatRateMinutes=args.repeatdt
+    time_in_days = args.time_in_days
+    use_xarray = args.use_xarray
+    agingParticles = args.aging
+    with_GC = args.useGC
+    Nparticle = int(float(eval(args.nparticles)))
+    target_N = Nparticle
+    start_N_particles = int(float(eval(args.start_nparticles)))
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        if mpi_comm.Get_rank() == 0:
+            if agingParticles and not repeatdtFlag:
+                sys.stdout.write("N: {} ( {} )\n".format(Nparticle, int(Nparticle * ((3.0 / 2.0)**2.0))))
+            else:
+                sys.stdout.write("N: {}\n".format(Nparticle))
+    else:
+        if agingParticles and not repeatdtFlag:
+            sys.stdout.write("N: {} ( {} )\n".format(Nparticle, int(Nparticle * ((3.0 / 2.0)**2.0))))
+        else:
+            sys.stdout.write("N: {}\n".format(Nparticle))
+
+    dt_minutes = 60
+    #dt_minutes = 20
+    #random.seed(123456)
+    nowtime = datetime.datetime.now()
+    ParcelsRandom.seed(nowtime.microsecond)
+
+    branch = "soa_benchmark"
+    computer_env = "local/unspecified"
+    scenario = "stommel"
+    odir = ""
+    if os.uname()[1] in ['science-bs35', 'science-bs36']:  # Gemini
+        # odir = "/scratch/{}/experiments".format(os.environ['USER'])
+        odir = "/scratch/{}/experiments".format("ckehl")
+        computer_env = "Gemini"
+    elif fnmatch.fnmatchcase(os.uname()[1], "*.bullx*"):  # Cartesius
+        CARTESIUS_SCRATCH_USERNAME = 'ckehluu'
+        odir = "/scratch/shared/{}/experiments".format(CARTESIUS_SCRATCH_USERNAME)
+        computer_env = "Cartesius"
+    else:
+        odir = "/var/scratch/experiments"
+    print("running {} on {} (uname: {}) - branch '{}' - (target) N: {} - argv: {}".format(scenario, computer_env, os.uname()[1], branch, target_N, sys.argv[1:]))
+
+    func_time = []
+    mem_used_GB = []
+
+    np.random.seed(0)
+    fieldset = None
+    if use_xarray:
+        fieldset = stommel_fieldset_from_xarray(200, 200, periodic_wrap=periodicFlag)
+    else:
+        field_fpath = False
+        if args.write_out:
+            field_fpath = os.path.join(odir,"stommel")
+        fieldset = stommel_fieldset_from_numpy(200, 200, periodic_wrap=periodicFlag, write_out=field_fpath)
+
+    if args.compute_mode is 'scipy':
+        Nparticle = 2**10
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            # global_t_0 = ostime.time()
+            # global_t_0 = MPI.Wtime()
+            global_t_0 = ostime.process_time()
+    else:
+        # global_t_0 = ostime.time()
+        global_t_0 = ostime.process_time()
+
+    simStart = None
+    for f in fieldset.get_fields():
+        if type(f) in [VectorField, NestedField, SummedField]:  # or not f.grid.defer_load
+            continue
+        else:
+            if backwardSimulation:
+                simStart=f.grid.time_full[-1]
+            else:
+                simStart = f.grid.time_full[0]
+            break
+
+    addParticleN = 1
+    # np_scaler = math.sqrt(3.0/2.0)
+    np_scaler = (3.0 / 2.0)**2.0       # **
+    # np_scaler = 3.0 / 2.0
+    # cycle_scaler = math.sqrt(3.0/2.0)
+    cycle_scaler = (3.0 / 2.0)**2.0    # **
+    # cycle_scaler = 3.0 / 2.0
+    if agingParticles:
+        if not repeatdtFlag:
+            Nparticle = int(Nparticle * np_scaler)
+        fieldset.add_constant('life_expectancy', delta(days=time_in_days).total_seconds())
+    if repeatdtFlag:
+        addParticleN = Nparticle/2.0
+        refresh_cycle = (delta(days=time_in_days).total_seconds() / (addParticleN/start_N_particles)) / cycle_scaler
+        if agingParticles:
+            refresh_cycle /= cycle_scaler
+        repeatRateMinutes = int(refresh_cycle/60.0) if repeatRateMinutes == 720 else repeatRateMinutes
+
+    if backwardSimulation:
+        # ==== backward simulation ==== #
+        if agingParticles:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * a, lat=np.random.rand(start_N_particles, 1) * b, time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(int(Nparticle/2.0), 1) * a, lat=np.random.rand(int(Nparticle/2.0), 1) * b, time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * a, lat=np.random.rand(Nparticle, 1) * b, time=simStart)
+        else:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * a, lat=np.random.rand(start_N_particles, 1) * b, time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(int(Nparticle/2.0), 1) * a, lat=np.random.rand(int(Nparticle/2.0), 1) * b, time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * a, lat=np.random.rand(Nparticle, 1) * b, time=simStart)
+    else:
+        # ==== forward simulation ==== #
+        if agingParticles:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * a, lat=np.random.rand(start_N_particles, 1) * b, time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(int(Nparticle/2.0), 1) * a, lat=np.random.rand(int(Nparticle/2.0), 1) * b, time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=age_ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * a, lat=np.random.rand(Nparticle, 1) * b, time=simStart)
+        else:
+            if repeatdtFlag:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(start_N_particles, 1) * a, lat=np.random.rand(start_N_particles, 1) * b, time=simStart, repeatdt=delta(minutes=repeatRateMinutes))
+                psetA = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(int(Nparticle/2.0), 1) * a, lat=np.random.rand(int(Nparticle/2.0), 1) * b, time=simStart)
+                pset.add(psetA)
+            else:
+                pset = ParticleSet(fieldset=fieldset, pclass=ptype[(args.compute_mode).lower()], lon=np.random.rand(Nparticle, 1) * a, lat=np.random.rand(Nparticle, 1) * b, time=simStart)
+
+    # if agingParticles:
+    #     for p in pset.particles:
+    #         p.life_expectancy = delta(days=time_in_days).total_seconds()
+    # else:
+    #     for p in pset.particles:
+    #         p.initialized_dynamic = 1
+
+    # available_n_particles = len(pset)
+    # life = np.random.uniform(delta(hours=24).total_seconds(), delta(days=time_in_days).total_seconds(), available_n_particles)
+    # i=0
+    # for p in pset.particles:
+    #     p.life_expectancy = life[i]
+    #     i += 1
+
+    output_file = None
+    out_fname = "benchmark_stommel"
+    if args.write_out:
+        if MPI and (MPI.COMM_WORLD.Get_size()>1):
+            out_fname += "_MPI"
+        else:
+            out_fname += "_noMPI"
+        if periodicFlag:
+            out_fname += "_p"
+        out_fname += "_n"+str(Nparticle)
+        if backwardSimulation:
+            out_fname += "_bwd"
+        else:
+            out_fname += "_fwd"
+        if repeatdtFlag:
+            out_fname += "_add"
+        if agingParticles:
+            out_fname += "_age"
+        output_file = pset.ParticleFile(name=os.path.join(odir,out_fname+".nc"), outputdt=delta(hours=24))
+    delete_func = RenewParticle
+    if args.delete_particle:
+        delete_func=DeleteParticle
+    postProcessFuncs = []
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            # starttime = ostime.time()
+            # starttime = MPI.Wtime()
+            starttime = ostime.process_time()
+    else:
+        # starttime = ostime.time()
+        starttime = ostime.process_time()
+    kernels = pset.Kernel(AdvectionRK4,delete_cfiles=True)
+    if agingParticles:
+        kernels += pset.Kernel(initialize, delete_cfiles=True)
+        kernels += pset.Kernel(Age, delete_cfiles=True)
+    if with_GC:
+        postProcessFuncs.append(perIterGC)
+    if backwardSimulation:
+        # ==== backward simulation ==== #
+        if args.animate:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=-dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12), moviedt=delta(hours=6), movie_background_field=fieldset.U)
+        else:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=-dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12))
+    else:
+        # ==== forward simulation ==== #
+        if args.animate:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12), moviedt=delta(hours=6), movie_background_field=fieldset.U)
+        else:
+            pset.execute(kernels, runtime=delta(days=time_in_days), dt=delta(minutes=dt_minutes), output_file=output_file, recovery={ErrorCode.ErrorOutOfBounds: delete_func}, postIterationCallbacks=postProcessFuncs, callbackdt=delta(hours=12))
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        mpi_rank = mpi_comm.Get_rank()
+        if mpi_rank==0:
+            # endtime = ostime.time()
+            # endtime = MPI.Wtime()
+            endtime = ostime.process_time()
+    else:
+        #endtime = ostime.time()
+        endtime = ostime.process_time()
+
+    if args.write_out:
+        output_file.close()
+
+    # if MPI:
+    #     mpi_comm = MPI.COMM_WORLD
+    #     if mpi_comm.Get_rank() == 0:
+    #         dt_time = []
+    #         for i in range(len(perflog.times_steps)):
+    #             if i==0:
+    #                 dt_time.append( (perflog.times_steps[i]-global_t_0) )
+    #             else:
+    #                 dt_time.append( (perflog.times_steps[i]-perflog.times_steps[i-1]) )
+    #         sys.stdout.write("final # particles: {}\n".format(perflog.Nparticles_step[len(perflog.Nparticles_step)-1]))
+    #         sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime-starttime))
+    #         avg_time = np.mean(np.array(dt_time, dtype=np.float64))
+    #         sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time*1000.0))
+    # else:
+    #     dt_time = []
+    #     for i in range(len(perflog.times_steps)):
+    #         if i == 0:
+    #             dt_time.append((perflog.times_steps[i] - global_t_0))
+    #         else:
+    #             dt_time.append((perflog.times_steps[i] - perflog.times_steps[i - 1]))
+    #     sys.stdout.write("final # particles: {}\n".format(perflog.Nparticles_step[len(perflog.Nparticles_step)-1]))
+    #     sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime - starttime))
+    #     avg_time = np.mean(np.array(dt_time, dtype=np.float64))
+    #     sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time * 1000.0))
+
+    size_Npart = len(pset.nparticle_log)
+    Npart = pset.nparticle_log.get_param(size_Npart-1)
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        Npart = mpi_comm.reduce(Npart, op=MPI.SUM, root=0)
+        if mpi_comm.Get_rank() == 0:
+            if size_Npart>0:
+                sys.stdout.write("final # particles: {}\n".format( Npart ))
+            sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime-starttime))
+            avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+            sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time*1000.0))
+    else:
+        if size_Npart > 0:
+            sys.stdout.write("final # particles: {}\n".format( Npart ))
+        sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime - starttime))
+        avg_time = np.mean(np.array(pset.total_log.get_values(), dtype=np.float64))
+        sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time * 1000.0))
+
+    # if args.write_out:
+    #     output_file.close()
+    #     if args.visualize:
+    #         if MPI:
+    #             mpi_comm = MPI.COMM_WORLD
+    #             if mpi_comm.Get_rank() == 0:
+    #                 plotTrajectoriesFile_loadedField(os.path.join(odir, out_fname+".nc"), tracerfield=fieldset.U)
+    #         else:
+    #             plotTrajectoriesFile_loadedField(os.path.join(odir, out_fname+".nc"),tracerfield=fieldset.U)
+
+    if MPI:
+        mpi_comm = MPI.COMM_WORLD
+        # mpi_comm.Barrier()
+        Nparticles = mpi_comm.reduce(np.array(pset.nparticle_log.get_params()), op=MPI.SUM, root=0)
+        Nmem = mpi_comm.reduce(np.array(pset.mem_log.get_params()), op=MPI.SUM, root=0)
+        if mpi_comm.Get_rank() == 0:
+            pset.plot_and_log(memory_used=Nmem, nparticles=Nparticles, target_N=target_N, imageFilePath=imageFileName, odir=odir, xlim_range=[0, 730], ylim_range=[0, 150])
+    else:
+        pset.plot_and_log(target_N=target_N, imageFilePath=imageFileName, odir=odir, xlim_range=[0, 730], ylim_range=[0, 150])
+
+