From db0b8b3445c0a6a387875cb1bfa12da0b88c3fe6 Mon Sep 17 00:00:00 2001
From: Bouwe Andela <b.andela@esciencecenter.nl>
Date: Fri, 15 Nov 2024 12:55:42 +0100
Subject: [PATCH] Use custom tokenization function for NetCDFDataProxy objects

---
 lib/iris/fileformats/netcdf/_thread_safe_nc.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lib/iris/fileformats/netcdf/_thread_safe_nc.py b/lib/iris/fileformats/netcdf/_thread_safe_nc.py
index 3a556f5447..9956f1480a 100644
--- a/lib/iris/fileformats/netcdf/_thread_safe_nc.py
+++ b/lib/iris/fileformats/netcdf/_thread_safe_nc.py
@@ -327,6 +327,12 @@ def ndim(self):
     def dask_meta(self):
         return np.ma.array(np.empty((0,) * self.ndim, dtype=self.dtype), mask=True)
 
+    def __dask_tokenize__(self):
+        # Dask uses this function to uniquely identify the "array".
+        # A custom function is slightly faster than general object tokenization,
+        # which improves the speed of loading small NetCDF files.
+        return f"<{self.__class__.__name__} path={self.path!r} variable_name={self.variable_name!r}>"
+
     def __getitem__(self, keys):
         # Using a DatasetWrapper causes problems with invalid ID's and the
         # netCDF4 library, presumably because __getitem__ gets called so many