From db0b8b3445c0a6a387875cb1bfa12da0b88c3fe6 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Fri, 15 Nov 2024 12:55:42 +0100 Subject: [PATCH] Use custom tokenization function for NetCDFDataProxy objects --- lib/iris/fileformats/netcdf/_thread_safe_nc.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/iris/fileformats/netcdf/_thread_safe_nc.py b/lib/iris/fileformats/netcdf/_thread_safe_nc.py index 3a556f5447..9956f1480a 100644 --- a/lib/iris/fileformats/netcdf/_thread_safe_nc.py +++ b/lib/iris/fileformats/netcdf/_thread_safe_nc.py @@ -327,6 +327,12 @@ def ndim(self): def dask_meta(self): return np.ma.array(np.empty((0,) * self.ndim, dtype=self.dtype), mask=True) + def __dask_tokenize__(self): + # Dask uses this function to uniquely identify the "array". + # A custom function is slightly faster than general object tokenization, + # which improves the speed of loading small NetCDF files. + return f"<{self.__class__.__name__} path={self.path!r} variable_name={self.variable_name!r}>" + def __getitem__(self, keys): # Using a DatasetWrapper causes problems with invalid ID's and the # netCDF4 library, presumably because __getitem__ gets called so many