From 0ba246cfe2383ae35af28d2ab5e77b543fe93378 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Sat, 20 Jun 2020 13:01:22 -0500 Subject: [PATCH 1/5] Make CHUNKSIZE an index attribute --- yt/frontends/sdf/io.py | 6 ++---- yt/frontends/tipsy/io.py | 15 ++++++--------- yt/geometry/particle_geometry_handler.py | 11 ++++++++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/yt/frontends/sdf/io.py b/yt/frontends/sdf/io.py index 71b38f23f71..c1b25bb7be1 100644 --- a/yt/frontends/sdf/io.py +++ b/yt/frontends/sdf/io.py @@ -7,8 +7,6 @@ from yt.utilities.exceptions import YTDomainOverflow from yt.utilities.lib.geometry_utils import compute_morton -CHUNKSIZE = 32**3 - class IOHandlerSDF(BaseIOHandler): _dataset_type = "sdf_particles" @@ -64,7 +62,7 @@ def _initialize_index(self, data_file, regions): morton = np.empty(pcount, dtype='uint64') ind = 0 while ind < pcount: - npart = min(CHUNKSIZE, pcount - ind) + npart = min(self.ds.index._chunksize, pcount - ind) pos = np.empty((npart, 3), dtype=x.dtype) pos[:,0] = x[ind:ind+npart] pos[:,1] = y[ind:ind+npart] @@ -74,7 +72,7 @@ def _initialize_index(self, data_file, regions): pos[:,0], pos[:,1], pos[:,2], data_file.ds.domain_left_edge, data_file.ds.domain_right_edge) - ind += CHUNKSIZE + ind += self.ds.index._chunksize return morton def _identify_fields(self, data_file): diff --git a/yt/frontends/tipsy/io.py b/yt/frontends/tipsy/io.py index 11dcceffce8..a36d9fd1387 100644 --- a/yt/frontends/tipsy/io.py +++ b/yt/frontends/tipsy/io.py @@ -4,8 +4,6 @@ import os import struct -from yt.geometry.particle_geometry_handler import \ - CHUNKSIZE from yt.frontends.sph.io import \ IOHandlerSPH from yt.frontends.tipsy.definitions import \ @@ -26,8 +24,6 @@ class IOHandlerTipsyBinary(IOHandlerSPH): _ptypes = ("Gas", "DarkMatter", "Stars") - _chunksize = CHUNKSIZE - _aux_fields = None _fields = (("Gas", "Mass"), ("Gas", "Coordinates"), @@ -96,6 +92,7 @@ def _read_particle_coords(self, chunks, ptf): for chunk in chunks: for obj in chunk.objs: data_files.update(obj.data_files) + chunksize = self.ds.index._chunksize for data_file in sorted(data_files, key=lambda x: (x.filename, x.start)): poff = data_file.field_offsets tp = data_file.total_particles @@ -107,7 +104,7 @@ def _read_particle_coords(self, chunks, ptf): f.seek(poff[ptype]) total = 0 while total < tp[ptype]: - count = min(self._chunksize, tp[ptype] - total) + count = min(chunksize, tp[ptype] - total) p = np.fromfile(f, self._pdtypes[ptype], count=count) total += p.size d = [p["Coordinates"][ax].astype("float64") @@ -180,7 +177,7 @@ def _read_particle_fields(self, chunks, ptf, selector): continue f.seek(poff[ptype]) afields = list(set(field_list).intersection(self._aux_fields)) - count = min(self._chunksize, tp[ptype]) + count = min(self.ds.index._chunksize, tp[ptype]) p = np.fromfile(f, self._pdtypes[ptype], count=count) auxdata = [] for afield in afields: @@ -256,7 +253,7 @@ def _update_domain(self, data_file): continue stop = ind + count while ind < stop: - c = min(CHUNKSIZE, stop - ind) + c = min(self.ds.index._chunksize, stop - ind) pp = np.fromfile(f, dtype=self._pdtypes[ptype], count=c) np.minimum(mi, [pp["Coordinates"]["x"].min(), @@ -454,10 +451,10 @@ def _calculate_particle_offsets_aux(self, data_file): for i, ptype in enumerate(self._ptypes): if data_file.total_particles[ptype] == 0: continue - elif params[npart_mapping[ptype]] > CHUNKSIZE: + elif params[npart_mapping[ptype]] > self.ds.index._chunksize: for j in range(i): npart = params[npart_mapping[self._ptypes[j]]] - if npart > CHUNKSIZE: + if npart > self.ds.index._chunksize: pos += npart*size pos += data_file.start*size aux_fields_offsets[afield][ptype] = pos diff --git a/yt/geometry/particle_geometry_handler.py b/yt/geometry/particle_geometry_handler.py index 03ac22e961c..e6478a2a4e2 100644 --- a/yt/geometry/particle_geometry_handler.py +++ b/yt/geometry/particle_geometry_handler.py @@ -16,10 +16,10 @@ from yt.data_objects.particle_container import ParticleContainer from yt.utilities.lib.fnv_hash import fnv_hash -CHUNKSIZE = 64**3 class ParticleIndex(Index): """The Index subclass for particle datasets""" + _chunksize = 64**3 def __init__(self, ds, dataset_type): self.dataset_type = dataset_type self.dataset = weakref.proxy(ds) @@ -54,15 +54,20 @@ def _setup_filenames(self): fi = 0 for i in range(int(ndoms)): start = 0 - end = start + CHUNKSIZE + if self._chunk_size > 0: + end = start + self._chunk_size + else: + end = None while 1: df = cls(self.dataset, self.io, template % {'num':i}, fi, (start, end)) if max(df.total_particles.values()) == 0: break fi += 1 self.data_files.append(df) + if self._chunk_size <= 0: + break start = end - end += CHUNKSIZE + end += self._chunk_size self.total_particles = sum( sum(d.total_particles.values()) for d in self.data_files) From 077c461ae768364c03e3f8a2fd153f58c6d73a75 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Mon, 22 Jun 2020 11:15:45 -0500 Subject: [PATCH 2/5] half-finished rename --- yt/geometry/particle_geometry_handler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yt/geometry/particle_geometry_handler.py b/yt/geometry/particle_geometry_handler.py index e6478a2a4e2..6bcd4cb6121 100644 --- a/yt/geometry/particle_geometry_handler.py +++ b/yt/geometry/particle_geometry_handler.py @@ -54,8 +54,8 @@ def _setup_filenames(self): fi = 0 for i in range(int(ndoms)): start = 0 - if self._chunk_size > 0: - end = start + self._chunk_size + if self._chunksize > 0: + end = start + self._chunksize else: end = None while 1: @@ -64,10 +64,10 @@ def _setup_filenames(self): break fi += 1 self.data_files.append(df) - if self._chunk_size <= 0: + if self._chunksize <= 0: break start = end - end += self._chunk_size + end += self._chunksize self.total_particles = sum( sum(d.total_particles.values()) for d in self.data_files) From 2519f0f7ef13273169299ca8e213cc1241ca62ad Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Sun, 19 Jul 2020 14:13:35 -0500 Subject: [PATCH 3/5] Changing to a property --- yt/frontends/flash/io.py | 9 ++++++--- yt/frontends/sdf/io.py | 4 ++-- yt/frontends/tipsy/io.py | 10 +++++----- yt/geometry/particle_geometry_handler.py | 18 ++++++++++-------- 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/yt/frontends/flash/io.py b/yt/frontends/flash/io.py index e72ca882750..fdcc66cf11c 100644 --- a/yt/frontends/flash/io.py +++ b/yt/frontends/flash/io.py @@ -164,7 +164,10 @@ def __init__(self, ds): self._position_fields = [ self._particle_fields["particle_pos%s" % ax] for ax in "xyz" ] - self._chunksize = 32 ** 3 + + @property + def chunksize(self): + return 32 ** 3 def _read_fluid_selection(self, chunks, selector, fields, size): raise NotImplementedError @@ -224,7 +227,7 @@ def _initialize_index(self, data_file, regions): morton = np.empty(pcount, dtype="uint64") ind = 0 while ind < pcount: - npart = min(self._chunksize, pcount - ind) + npart = min(self.chunksize, pcount - ind) pos = np.empty((npart, 3), dtype="=f8") pos[:, 0] = p_fields[ind : ind + npart, px] pos[:, 1] = p_fields[ind : ind + npart, py] @@ -237,7 +240,7 @@ def _initialize_index(self, data_file, regions): data_file.ds.domain_left_edge, data_file.ds.domain_right_edge, ) - ind += self._chunksize + ind += self.chunksize return morton _pcount = None diff --git a/yt/frontends/sdf/io.py b/yt/frontends/sdf/io.py index b2ef9306333..f1e5795365f 100644 --- a/yt/frontends/sdf/io.py +++ b/yt/frontends/sdf/io.py @@ -65,7 +65,7 @@ def _initialize_index(self, data_file, regions): morton = np.empty(pcount, dtype="uint64") ind = 0 while ind < pcount: - npart = min(self.ds.index._chunksize, pcount - ind) + npart = min(self.ds.index.chunksize, pcount - ind) pos = np.empty((npart, 3), dtype=x.dtype) pos[:, 0] = x[ind : ind + npart] pos[:, 1] = y[ind : ind + npart] @@ -78,7 +78,7 @@ def _initialize_index(self, data_file, regions): data_file.ds.domain_left_edge, data_file.ds.domain_right_edge, ) - ind += self.ds.index._chunksize + ind += self.ds.index.chunksize return morton def _identify_fields(self, data_file): diff --git a/yt/frontends/tipsy/io.py b/yt/frontends/tipsy/io.py index bc369251157..a6be6b9e407 100644 --- a/yt/frontends/tipsy/io.py +++ b/yt/frontends/tipsy/io.py @@ -90,7 +90,7 @@ def _read_particle_coords(self, chunks, ptf): for chunk in chunks: for obj in chunk.objs: data_files.update(obj.data_files) - chunksize = self.ds.index._chunksize + chunksize = self.ds.index.chunksize for data_file in sorted(data_files, key=lambda x: (x.filename, x.start)): poff = data_file.field_offsets tp = data_file.total_particles @@ -176,7 +176,7 @@ def _read_particle_fields(self, chunks, ptf, selector): continue f.seek(poff[ptype]) afields = list(set(field_list).intersection(self._aux_fields)) - count = min(self.ds.index._chunksize, tp[ptype]) + count = min(self.ds.index.chunksize, tp[ptype]) p = np.fromfile(f, self._pdtypes[ptype], count=count) auxdata = [] for afield in afields: @@ -252,7 +252,7 @@ def _update_domain(self, data_file): continue stop = ind + count while ind < stop: - c = min(self.ds.index._chunksize, stop - ind) + c = min(self.ds.index.chunksize, stop - ind) pp = np.fromfile(f, dtype=self._pdtypes[ptype], count=c) np.minimum( mi, @@ -467,10 +467,10 @@ def _calculate_particle_offsets_aux(self, data_file): for i, ptype in enumerate(self._ptypes): if data_file.total_particles[ptype] == 0: continue - elif params[npart_mapping[ptype]] > self.ds.index._chunksize: + elif params[npart_mapping[ptype]] > self.ds.index.chunksize: for j in range(i): npart = params[npart_mapping[self._ptypes[j]]] - if npart > self.ds.index._chunksize: + if npart > self.ds.index.chunksize: pos += npart * size pos += data_file.start * size aux_fields_offsets[afield][ptype] = pos diff --git a/yt/geometry/particle_geometry_handler.py b/yt/geometry/particle_geometry_handler.py index c6729f392f9..4a0769c0007 100644 --- a/yt/geometry/particle_geometry_handler.py +++ b/yt/geometry/particle_geometry_handler.py @@ -13,12 +13,9 @@ from yt.utilities.lib.fnv_hash import fnv_hash from yt.utilities.logger import ytLogger as mylog -CHUNKSIZE = 64 ** 3 - class ParticleIndex(Index): """The Index subclass for particle datasets""" - _chunksize = 64**3 def __init__(self, ds, dataset_type): self.dataset_type = dataset_type @@ -46,6 +43,11 @@ def _get_particle_type_counts(self): def convert(self, unit): return self.dataset.conversion_factors[unit] + @property + def chunksize(self): + # This can be overridden in subclasses + return 64 ** 3 + def _setup_filenames(self): template = self.dataset.filename_template ndoms = self.dataset.file_count @@ -54,20 +56,20 @@ def _setup_filenames(self): fi = 0 for i in range(int(ndoms)): start = 0 - if self._chunksize > 0: - end = start + self._chunksize + if self.chunksize > 0: + end = start + self.chunksize else: end = None while True: - df = cls(self.dataset, self.io, template % {'num':i}, fi, (start, end)) + df = cls(self.dataset, self.io, template % {"num": i}, fi, (start, end)) if max(df.total_particles.values()) == 0: break fi += 1 self.data_files.append(df) - if self._chunksize <= 0: + if self.chunksize <= 0: break start = end - end += self._chunksize + end += self.chunksize self.total_particles = sum( sum(d.total_particles.values()) for d in self.data_files ) From 20a37e478e04b643826f381eef2639b77a8a9867 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Fri, 18 Sep 2020 10:11:29 -0500 Subject: [PATCH 4/5] Fix black manually --- yt/geometry/particle_geometry_handler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt/geometry/particle_geometry_handler.py b/yt/geometry/particle_geometry_handler.py index 6f3b630db0c..76a60a90c16 100644 --- a/yt/geometry/particle_geometry_handler.py +++ b/yt/geometry/particle_geometry_handler.py @@ -43,7 +43,6 @@ def _get_particle_type_counts(self): def convert(self, unit): return self.dataset.conversion_factors[unit] - @property def chunksize(self): # This can be overridden in subclasses From e4d4d68560cc713a3dbed3c4abff400ef04e9be9 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Fri, 22 Jan 2021 08:38:53 -0600 Subject: [PATCH 5/5] Using hardcoded local chunksize for tipsy update domain code --- yt/frontends/tipsy/io.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/yt/frontends/tipsy/io.py b/yt/frontends/tipsy/io.py index b1e170d0dd9..2172a5766f6 100644 --- a/yt/frontends/tipsy/io.py +++ b/yt/frontends/tipsy/io.py @@ -232,6 +232,16 @@ def _update_domain(self, data_file): """ ds = data_file.ds ind = 0 + # NOTE: + # We hardcode this value here because otherwise we get into a + # situation where we require the existence of index before we + # can successfully instantiate it, or where we are calling it + # from within its instantiation. + # + # Because this value is not propagated later on, and does not + # impact the construction of the bitmap indices, it should be + # acceptable to just use a reasonable number here. + chunksize = 64 ** 3 # Check to make sure that the domain hasn't already been set # by the parameter file if np.all(np.isfinite(ds.domain_left_edge)) and np.all( @@ -251,7 +261,7 @@ def _update_domain(self, data_file): continue stop = ind + count while ind < stop: - c = min(self.ds.index.chunksize, stop - ind) + c = min(chunksize, stop - ind) pp = np.fromfile(f, dtype=self._pdtypes[ptype], count=c) np.minimum( mi,