Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make CHUNKSIZE into an index property #2658

Merged
merged 8 commits into from
Jan 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions yt/frontends/flash/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,10 @@ def __init__(self, ds):
self._position_fields = [
self._particle_fields[f"particle_pos{ax}"] for ax in "xyz"
]
self._chunksize = 32 ** 3

@property
def chunksize(self):
return 32 ** 3

def _read_fluid_selection(self, chunks, selector, fields, size):
raise NotImplementedError
Expand Down Expand Up @@ -222,7 +225,7 @@ def _initialize_index(self, data_file, regions):
morton = np.empty(pcount, dtype="uint64")
ind = 0
while ind < pcount:
npart = min(self._chunksize, pcount - ind)
npart = min(self.chunksize, pcount - ind)
pos = np.empty((npart, 3), dtype="=f8")
pos[:, 0] = p_fields[ind : ind + npart, px]
pos[:, 1] = p_fields[ind : ind + npart, py]
Expand All @@ -235,7 +238,7 @@ def _initialize_index(self, data_file, regions):
data_file.ds.domain_left_edge,
data_file.ds.domain_right_edge,
)
ind += self._chunksize
ind += self.chunksize
return morton

_pcount = None
Expand Down
6 changes: 2 additions & 4 deletions yt/frontends/sdf/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
from yt.utilities.io_handler import BaseIOHandler
from yt.utilities.lib.geometry_utils import compute_morton

CHUNKSIZE = 32 ** 3


class IOHandlerSDF(BaseIOHandler):
_dataset_type = "sdf_particles"
Expand Down Expand Up @@ -67,7 +65,7 @@ def _initialize_index(self, data_file, regions):
morton = np.empty(pcount, dtype="uint64")
ind = 0
while ind < pcount:
npart = min(CHUNKSIZE, pcount - ind)
npart = min(self.ds.index.chunksize, pcount - ind)
pos = np.empty((npart, 3), dtype=x.dtype)
pos[:, 0] = x[ind : ind + npart]
pos[:, 1] = y[ind : ind + npart]
Expand All @@ -80,7 +78,7 @@ def _initialize_index(self, data_file, regions):
data_file.ds.domain_left_edge,
data_file.ds.domain_right_edge,
)
ind += CHUNKSIZE
ind += self.ds.index.chunksize
return morton

def _identify_fields(self, data_file):
Expand Down
23 changes: 16 additions & 7 deletions yt/frontends/tipsy/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from yt.frontends.sph.io import IOHandlerSPH
from yt.frontends.tipsy.definitions import npart_mapping
from yt.geometry.particle_geometry_handler import CHUNKSIZE
from yt.utilities.lib.particle_kdtree_tools import generate_smoothing_length
from yt.utilities.logger import ytLogger as mylog

Expand All @@ -20,7 +19,6 @@ class IOHandlerTipsyBinary(IOHandlerSPH):
_aux_pdtypes = None # auxiliary files' dtypes

_ptypes = ("Gas", "DarkMatter", "Stars")
_chunksize = CHUNKSIZE

_aux_fields = None
_fields = (
Expand Down Expand Up @@ -92,6 +90,7 @@ def _read_particle_coords(self, chunks, ptf):
for chunk in chunks:
for obj in chunk.objs:
data_files.update(obj.data_files)
chunksize = self.ds.index.chunksize
for data_file in sorted(data_files, key=lambda x: (x.filename, x.start)):
poff = data_file.field_offsets
tp = data_file.total_particles
Expand All @@ -102,7 +101,7 @@ def _read_particle_coords(self, chunks, ptf):
f.seek(poff[ptype])
total = 0
while total < tp[ptype]:
count = min(self._chunksize, tp[ptype] - total)
count = min(chunksize, tp[ptype] - total)
p = np.fromfile(f, self._pdtypes[ptype], count=count)
total += p.size
d = [p["Coordinates"][ax].astype("float64") for ax in "xyz"]
Expand Down Expand Up @@ -176,7 +175,7 @@ def _read_particle_fields(self, chunks, ptf, selector):
continue
f.seek(poff[ptype])
afields = list(set(field_list).intersection(self._aux_fields))
count = min(self._chunksize, tp[ptype])
count = min(self.ds.index.chunksize, tp[ptype])
p = np.fromfile(f, self._pdtypes[ptype], count=count)
auxdata = []
for afield in afields:
Expand Down Expand Up @@ -233,6 +232,16 @@ def _update_domain(self, data_file):
"""
ds = data_file.ds
ind = 0
# NOTE:
# We hardcode this value here because otherwise we get into a
# situation where we require the existence of index before we
# can successfully instantiate it, or where we are calling it
# from within its instantiation.
#
# Because this value is not propagated later on, and does not
# impact the construction of the bitmap indices, it should be
# acceptable to just use a reasonable number here.
chunksize = 64 ** 3
# Check to make sure that the domain hasn't already been set
# by the parameter file
if np.all(np.isfinite(ds.domain_left_edge)) and np.all(
Expand All @@ -252,7 +261,7 @@ def _update_domain(self, data_file):
continue
stop = ind + count
while ind < stop:
c = min(CHUNKSIZE, stop - ind)
c = min(chunksize, stop - ind)
pp = np.fromfile(f, dtype=self._pdtypes[ptype], count=c)
np.minimum(
mi,
Expand Down Expand Up @@ -467,10 +476,10 @@ def _calculate_particle_offsets_aux(self, data_file):
for i, ptype in enumerate(self._ptypes):
if data_file.total_particles[ptype] == 0:
continue
elif params[npart_mapping[ptype]] > CHUNKSIZE:
elif params[npart_mapping[ptype]] > self.ds.index.chunksize:
for j in range(i):
npart = params[npart_mapping[self._ptypes[j]]]
if npart > CHUNKSIZE:
if npart > self.ds.index.chunksize:
pos += npart * size
pos += data_file.start * size
aux_fields_offsets[afield][ptype] = pos
Expand Down
16 changes: 12 additions & 4 deletions yt/geometry/particle_geometry_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
from yt.utilities.lib.fnv_hash import fnv_hash
from yt.utilities.logger import ytLogger as mylog

CHUNKSIZE = 64 ** 3


class ParticleIndex(Index):
"""The Index subclass for particle datasets"""
Expand Down Expand Up @@ -45,6 +43,11 @@ def _get_particle_type_counts(self):
def convert(self, unit):
return self.dataset.conversion_factors[unit]

@property
def chunksize(self):
# This can be overridden in subclasses
return 64 ** 3

_data_files = None

@property
Expand Down Expand Up @@ -79,15 +82,20 @@ def _setup_filenames(self):
fi = 0
for i in range(int(ndoms)):
start = 0
end = start + CHUNKSIZE
if self.chunksize > 0:
end = start + self.chunksize
else:
end = None
while True:
df = cls(self.dataset, self.io, template % {"num": i}, fi, (start, end))
if max(df.total_particles.values()) == 0:
break
fi += 1
self.data_files.append(df)
if self.chunksize <= 0:
break
start = end
end += CHUNKSIZE
end += self.chunksize

def _initialize_index(self):
ds = self.dataset
Expand Down