From c3ae3e0eccd7f8ee25f7123308e57481399dcb34 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Mon, 4 May 2020 12:24:14 -0500 Subject: [PATCH 01/42] First sketch of a particle selection tester --- yt/testing.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/yt/testing.py b/yt/testing.py index 94abdceda26..6b09f0a60f4 100644 --- a/yt/testing.py +++ b/yt/testing.py @@ -1223,3 +1223,32 @@ def setUp(self): def tearDown(self): os.chdir(self.curdir) shutil.rmtree(self.tmpdir) + +# We make this a class with a setup so we can cache the particles one time +class ParticleSelectionComparison: + + def __init__(self, ds): + self.ds = ds + # Construct an index so that we get all the data_files + ds.index + particles = {} + for data_file in ds.index.data_files: + for ptype, pos_arr in ds.index.io._yield_coordinates(data_file): + particles.setdefault(ptype, []).append(pos_arr) + for ptype in particles: + particles[ptype] = np.concatenate(particles[ptype]) + self.particles = particles + + def compare_dobj_selection(self, dobj): + for ptype in sorted(self.particles): + x, y, z = self.particles[ptype].T + # Set our radii to zero for now, I guess? + sel_index = dobj.selector.select_points(x, y, z, 0.0) + sel_pos = self.particles[ptype][sel_index, :] + + obj_results = [] + for chunk in dobj.chunks([], "io"): + obj_results.append(chunk[ptype, "particle_position"]) + obj_results = np.concatenate(obj_results, axis = 0) + + assert np.all(sel_pos == obj_results) From 0299776e2bc64d547fcac2604d3ec0230fda2fd6 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Mon, 4 May 2020 17:58:09 -0500 Subject: [PATCH 02/42] Add in the smoothing length calculations --- yt/testing.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/yt/testing.py b/yt/testing.py index 6b09f0a60f4..60b8e7f7bc1 100644 --- a/yt/testing.py +++ b/yt/testing.py @@ -1232,18 +1232,26 @@ def __init__(self, ds): # Construct an index so that we get all the data_files ds.index particles = {} + hsml = {} for data_file in ds.index.data_files: for ptype, pos_arr in ds.index.io._yield_coordinates(data_file): particles.setdefault(ptype, []).append(pos_arr) + if ptype in getattr(ds, '_sph_ptypes', ()): + hsml.setdefault(ptype, []).append(ds.index.io._get_smoothing_length( + data_file, pos_arr.dtype, pos_arr.shape)) for ptype in particles: particles[ptype] = np.concatenate(particles[ptype]) + if ptype in hsml: + hsml[ptype] = np.concatenate(hsml[ptype]) self.particles = particles + self.hsml = hsml def compare_dobj_selection(self, dobj): for ptype in sorted(self.particles): x, y, z = self.particles[ptype].T # Set our radii to zero for now, I guess? - sel_index = dobj.selector.select_points(x, y, z, 0.0) + radii = self.hsml.get(ptype, 0.0) + sel_index = dobj.selector.select_points(x, y, z, radii) sel_pos = self.particles[ptype][sel_index, :] obj_results = [] From 073fbf9ed1b8ed765df787d4390e25293815e41f Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Mon, 4 May 2020 17:58:45 -0500 Subject: [PATCH 03/42] Fix particle selection for sub-regions Thanks to Meagan Lang for debugging this with me. We discovered there was a corner case for filling subregions of mi1 and mi2, which showed up in #2574. This corrects that by choosing the region correctly. --- yt/geometry/particle_oct_container.pyx | 39 +++++++++++++++----------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index 9aeec58dae5..f95369fe2dc 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -1532,11 +1532,11 @@ cdef class ParticleBitmapSelector: @cython.cdivision(True) @cython.initializedcheck(False) cdef void add_coarse(self, np.uint64_t mi1, int bbox = 2): - cdef bint flag_ref = self.is_refined(mi1) + cdef bint flag_ref self.coarse_select_bool[mi1] = 1 # Neighbors - if (self.ngz > 0) and (flag_ref == 0): - if (bbox == 2): + if (self.ngz > 0) and (bbox == 2): + if self.is_refined(mi1): self.add_neighbors_coarse(mi1) @cython.boundscheck(False) @@ -1563,9 +1563,8 @@ cdef class ParticleBitmapSelector: cdef int add_refined(self, np.uint64_t mi1, np.uint64_t mi2, int bbox = 2) except -1: self.refined_select_bool[mi2] = 1 # Neighbors - if (self.ngz > 0): - if (bbox == 2): - self.add_neighbors_refined(mi1, mi2) + if (self.ngz > 0) and (bbox == 2): + self.add_neighbors_refined(mi1, mi2) @cython.boundscheck(False) @cython.wraparound(False) @@ -1752,11 +1751,14 @@ cdef class ParticleBitmapSelector: np.uint64_t ind1[3]) except -1: cdef np.uint64_t imi, fmi cdef np.uint64_t mi - cdef np.uint64_t indexgap = 1 << (self.bitmap.index_order1 - nlevel) - imi = encode_morton_64bit(ind1[0], ind1[1], ind1[2]) - fmi = encode_morton_64bit( - ind1[0]+indexgap-1, ind1[1]+indexgap-1, ind1[2]+indexgap-1) - for mi in range(imi, fmi+1): + cdef np.uint64_t start_ind[3], end_ind[3] + cdef np.uint64_t shift_by = (self.bitmap.index_order1 - nlevel) + for i in range(3): + start_ind[i] = ind1[i] << shift_by + end_ind[i] = start_ind[i] + (1 << shift_by) - 1 + imi = encode_morton_64bit(start_ind[0], start_ind[1], start_ind[2]) + fmi = encode_morton_64bit(end_ind[0], end_ind[1], end_ind[2]) + for mi in range(imi, fmi): self.add_coarse(mi, 1) @cython.boundscheck(False) @@ -1767,12 +1769,15 @@ cdef class ParticleBitmapSelector: np.uint64_t mi1, np.uint64_t ind2[3]) except -1: cdef np.uint64_t imi, fmi - cdef np.uint64_t indexgap = 1 << ( - self.bitmap.index_order2 - (nlevel - self.bitmap.index_order1)) - imi = encode_morton_64bit(ind2[0], ind2[1], ind2[2]) - fmi = encode_morton_64bit( - ind2[0]+indexgap-1, ind2[1]+indexgap-1, ind2[2]+indexgap-1) - for mi2 in range(imi, fmi+1): + cdef np.uint64_t shift_by = (self.bitmap.index_order2 + + self.bitmap.index_order1) - nlevel + cdef np.uint64_t start_ind[3], end_ind[3] + for i in range(3): + start_ind[i] = ind2[i] << shift_by + end_ind[i] = start_ind[i] + (1 << shift_by) - 1 + imi = encode_morton_64bit(start_ind[0], start_ind[1], start_ind[2]) + fmi = encode_morton_64bit(end_ind[0], end_ind[1], end_ind[2]) + for mi2 in range(imi, fmi + 1): self.add_refined(mi1, mi2, 1) @cython.boundscheck(False) From f3dd24974168cb2c8215f9a5dc4ddf299d20485a Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Tue, 5 May 2020 09:38:13 -0500 Subject: [PATCH 04/42] Had the logic for is_refined backwards --- yt/geometry/particle_oct_container.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index f95369fe2dc..63b50e4f8e8 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -1536,7 +1536,7 @@ cdef class ParticleBitmapSelector: self.coarse_select_bool[mi1] = 1 # Neighbors if (self.ngz > 0) and (bbox == 2): - if self.is_refined(mi1): + if not self.is_refined(mi1): self.add_neighbors_coarse(mi1) @cython.boundscheck(False) From 7a997036d3805d0405c27fa700a5811bd36aea4f Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Tue, 5 May 2020 11:30:39 -0500 Subject: [PATCH 05/42] Updating tests to use particle selection comparison --- yt/frontends/gadget/tests/test_outputs.py | 44 ++++++++++++++++++++++- yt/testing.py | 9 +++-- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/yt/frontends/gadget/tests/test_outputs.py b/yt/frontends/gadget/tests/test_outputs.py index b952bd2c4f9..1f14eb50fd4 100644 --- a/yt/frontends/gadget/tests/test_outputs.py +++ b/yt/frontends/gadget/tests/test_outputs.py @@ -5,7 +5,9 @@ import tempfile import yt -from yt.testing import requires_file +from yt.testing import requires_file, \ + ParticleSelectionComparison, \ + assert_equal from yt.utilities.answer_testing.framework import \ data_dir_load, \ requires_ds, \ @@ -107,6 +109,46 @@ def test_multifile_read(): assert isinstance(data_dir_load(snap_33), GadgetDataset) assert isinstance(data_dir_load(snap_33_dir), GadgetDataset) +@requires_file(snap_33) +def test_particle_subselection(): + """ + This checks that we correctly subselect from a dataset, first by making + sure we get all the particles, then by comparing manual selections against + them. + """ + ds = data_dir_load(snap_33) + psc = ParticleSelectionComparison(ds) + + sp1 = ds.sphere("c", (0.1, "unitary")) + assert_equal(psc.compare_dobj_selection(sp1) , True) + + sp2 = ds.sphere("c", (0.1, "unitary")) + assert_equal(psc.compare_dobj_selection(sp2) , True) + + sp3 = ds.sphere((1.0, 1.0, 1.0), (0.05, "unitary")) + assert_equal(psc.compare_dobj_selection(sp3) , True) + + sp4 = ds.sphere("c", (0.5, "unitary")) + assert_equal(psc.compare_dobj_selection(sp4) , True) + + dd = ds.all_data() + assert_equal(psc.compare_dobj_selection(dd) , True) + + reg1 = ds.r[ (0.1, 'unitary'):(0.9, 'unitary'), + (0.1, 'unitary'):(0.9, 'unitary'), + (0.1, 'unitary'):(0.9, 'unitary')] + assert_equal(psc.compare_dobj_selection(reg1) , True) + + reg2 = ds.r[ (0.8, 'unitary'):(0.85, 'unitary'), + (0.8, 'unitary'):(0.85, 'unitary'), + (0.8, 'unitary'):(0.85, 'unitary')] + assert_equal(psc.compare_dobj_selection(reg2) , True) + + reg3 = ds.r[ (0.3, 'unitary'):(0.6, 'unitary'), + (0.2, 'unitary'):(0.8, 'unitary'), + (0.0, 'unitary'):(0.1, 'unitary')] + assert_equal(psc.compare_dobj_selection(reg3) , True) + @requires_ds(BE_Gadget) def test_bigendian_field_access(): ds = data_dir_load(BE_Gadget) diff --git a/yt/testing.py b/yt/testing.py index 60b8e7f7bc1..7d2bd098dc7 100644 --- a/yt/testing.py +++ b/yt/testing.py @@ -1224,8 +1224,13 @@ def tearDown(self): os.chdir(self.curdir) shutil.rmtree(self.tmpdir) -# We make this a class with a setup so we can cache the particles one time class ParticleSelectionComparison: + """ + This is a test helper class that takes a particle dataset, caches the + particles it has on disk (manually reading them using lower-level IO + routines) and then received a data object that it compares against manually + running the data object's selection routines. + """ def __init__(self, ds): self.ds = ds @@ -1259,4 +1264,4 @@ def compare_dobj_selection(self, dobj): obj_results.append(chunk[ptype, "particle_position"]) obj_results = np.concatenate(obj_results, axis = 0) - assert np.all(sel_pos == obj_results) + return np.all(sel_pos == obj_results) From df4cf91acdfd114bc351b964e5891cb9621245bf Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Tue, 5 May 2020 12:24:36 -0500 Subject: [PATCH 06/42] Update yt/geometry/particle_oct_container.pyx Co-authored-by: Meagan Lang --- yt/geometry/particle_oct_container.pyx | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index 63b50e4f8e8..9b6e649b9e5 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -1769,15 +1769,11 @@ cdef class ParticleBitmapSelector: np.uint64_t mi1, np.uint64_t ind2[3]) except -1: cdef np.uint64_t imi, fmi - cdef np.uint64_t shift_by = (self.bitmap.index_order2 + - self.bitmap.index_order1) - nlevel - cdef np.uint64_t start_ind[3], end_ind[3] - for i in range(3): - start_ind[i] = ind2[i] << shift_by - end_ind[i] = start_ind[i] + (1 << shift_by) - 1 - imi = encode_morton_64bit(start_ind[0], start_ind[1], start_ind[2]) - fmi = encode_morton_64bit(end_ind[0], end_ind[1], end_ind[2]) - for mi2 in range(imi, fmi + 1): + cdef np.uint64_t shift_by = 3 * ((self.bitmap.index_order2 + + self.bitmap.index_order1) - nlevel) + imi = encode_morton_64bit(ind2[0], ind2[1], ind2[2]) << shift_by + fmi = imi + (1 << shift_by) + for mi2 in range(imi, fmi): self.add_refined(mi1, mi2, 1) @cython.boundscheck(False) From 878e01816ecfeb8bdce43abe0b121a026fb63463 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Tue, 5 May 2020 12:24:44 -0500 Subject: [PATCH 07/42] Update yt/geometry/particle_oct_container.pyx Co-authored-by: Meagan Lang --- yt/geometry/particle_oct_container.pyx | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index 9b6e649b9e5..a66095b7ac8 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -1751,13 +1751,9 @@ cdef class ParticleBitmapSelector: np.uint64_t ind1[3]) except -1: cdef np.uint64_t imi, fmi cdef np.uint64_t mi - cdef np.uint64_t start_ind[3], end_ind[3] - cdef np.uint64_t shift_by = (self.bitmap.index_order1 - nlevel) - for i in range(3): - start_ind[i] = ind1[i] << shift_by - end_ind[i] = start_ind[i] + (1 << shift_by) - 1 - imi = encode_morton_64bit(start_ind[0], start_ind[1], start_ind[2]) - fmi = encode_morton_64bit(end_ind[0], end_ind[1], end_ind[2]) + cdef np.uint64_t shift_by = 3 * (self.bitmap.index_order1 - nlevel) + imi = encode_morton_64bit(ind1[0], ind1[1], ind1[2]) << shift_by + fmi = imi + (1 << shift_by) for mi in range(imi, fmi): self.add_coarse(mi, 1) From cc93f1949d93fa4015b2063d1a6af05e96ca11bd Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Tue, 5 May 2020 12:25:06 -0500 Subject: [PATCH 08/42] Update yt/geometry/particle_oct_container.pyx Co-authored-by: Meagan Lang --- yt/geometry/particle_oct_container.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index a66095b7ac8..aab682f48a4 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -1532,7 +1532,6 @@ cdef class ParticleBitmapSelector: @cython.cdivision(True) @cython.initializedcheck(False) cdef void add_coarse(self, np.uint64_t mi1, int bbox = 2): - cdef bint flag_ref self.coarse_select_bool[mi1] = 1 # Neighbors if (self.ngz > 0) and (bbox == 2): From a8ef879e4f28a353d9e4f8fc65791cbe9bbf3da6 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Tue, 5 May 2020 13:39:17 -0500 Subject: [PATCH 09/42] Updating from comments --- yt/frontends/gadget/tests/test_outputs.py | 2 +- yt/testing.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/yt/frontends/gadget/tests/test_outputs.py b/yt/frontends/gadget/tests/test_outputs.py index 1f14eb50fd4..c836e8505ce 100644 --- a/yt/frontends/gadget/tests/test_outputs.py +++ b/yt/frontends/gadget/tests/test_outputs.py @@ -122,7 +122,7 @@ def test_particle_subselection(): sp1 = ds.sphere("c", (0.1, "unitary")) assert_equal(psc.compare_dobj_selection(sp1) , True) - sp2 = ds.sphere("c", (0.1, "unitary")) + sp2 = ds.sphere("c", (0.2, "unitary")) assert_equal(psc.compare_dobj_selection(sp2) , True) sp3 = ds.sphere((1.0, 1.0, 1.0), (0.05, "unitary")) diff --git a/yt/testing.py b/yt/testing.py index 7d2bd098dc7..4ecdc693b38 100644 --- a/yt/testing.py +++ b/yt/testing.py @@ -1229,7 +1229,8 @@ class ParticleSelectionComparison: This is a test helper class that takes a particle dataset, caches the particles it has on disk (manually reading them using lower-level IO routines) and then received a data object that it compares against manually - running the data object's selection routines. + running the data object's selection routines. All supplied data objects + must be created from the input dataset. """ def __init__(self, ds): @@ -1237,6 +1238,7 @@ def __init__(self, ds): # Construct an index so that we get all the data_files ds.index particles = {} + # hsml is the smoothing length we use for radial selection hsml = {} for data_file in ds.index.data_files: for ptype, pos_arr in ds.index.io._yield_coordinates(data_file): From d22947ec058647e1722120a30493a9dc8603c7f5 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Thu, 7 May 2020 09:51:01 -0500 Subject: [PATCH 10/42] Refine tests a bit --- yt/frontends/gadget/tests/test_outputs.py | 41 ++++++++++++++--------- yt/testing.py | 8 +++-- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/yt/frontends/gadget/tests/test_outputs.py b/yt/frontends/gadget/tests/test_outputs.py index c836e8505ce..168dd10c1fe 100644 --- a/yt/frontends/gadget/tests/test_outputs.py +++ b/yt/frontends/gadget/tests/test_outputs.py @@ -111,43 +111,54 @@ def test_multifile_read(): @requires_file(snap_33) def test_particle_subselection(): - """ - This checks that we correctly subselect from a dataset, first by making - sure we get all the particles, then by comparing manual selections against - them. - """ + #This checks that we correctly subselect from a dataset, first by making + #sure we get all the particles, then by comparing manual selections against + #them. ds = data_dir_load(snap_33) psc = ParticleSelectionComparison(ds) - + sp1 = ds.sphere("c", (0.1, "unitary")) - assert_equal(psc.compare_dobj_selection(sp1) , True) + psc.compare_dobj_selection(sp1) sp2 = ds.sphere("c", (0.2, "unitary")) - assert_equal(psc.compare_dobj_selection(sp2) , True) + psc.compare_dobj_selection(sp2) + + # Test wrapping around each axis individually: x + sp3_x = ds.sphere((1.0, 12.5, 12.5), (2.0, "code_length")) + psc.compare_dobj_selection(sp3_x) + + # Test wrapping around each axis individually: y + sp3_y = ds.sphere((12.5, 1.0, 12.5), (2.0, "code_length")) + psc.compare_dobj_selection(sp3_y) + + # Test wrapping around each axis individually: z + sp3_z = ds.sphere((12.5, 12.5, 1.0), (2.0, "code_length")) + psc.compare_dobj_selection(sp3_z) - sp3 = ds.sphere((1.0, 1.0, 1.0), (0.05, "unitary")) - assert_equal(psc.compare_dobj_selection(sp3) , True) + # Test wrapping around all three axes simultaneously + sp3_all = ds.sphere((1.0, 1.0, 1.0), (2.0, "code_length")) + psc.compare_dobj_selection(sp3_all) sp4 = ds.sphere("c", (0.5, "unitary")) - assert_equal(psc.compare_dobj_selection(sp4) , True) + psc.compare_dobj_selection(sp4) dd = ds.all_data() - assert_equal(psc.compare_dobj_selection(dd) , True) + psc.compare_dobj_selection(dd) reg1 = ds.r[ (0.1, 'unitary'):(0.9, 'unitary'), (0.1, 'unitary'):(0.9, 'unitary'), (0.1, 'unitary'):(0.9, 'unitary')] - assert_equal(psc.compare_dobj_selection(reg1) , True) + psc.compare_dobj_selection(reg1) reg2 = ds.r[ (0.8, 'unitary'):(0.85, 'unitary'), (0.8, 'unitary'):(0.85, 'unitary'), (0.8, 'unitary'):(0.85, 'unitary')] - assert_equal(psc.compare_dobj_selection(reg2) , True) + psc.compare_dobj_selection(reg2) reg3 = ds.r[ (0.3, 'unitary'):(0.6, 'unitary'), (0.2, 'unitary'):(0.8, 'unitary'), (0.0, 'unitary'):(0.1, 'unitary')] - assert_equal(psc.compare_dobj_selection(reg3) , True) + psc.compare_dobj_selection(reg3) @requires_ds(BE_Gadget) def test_bigendian_field_access(): diff --git a/yt/testing.py b/yt/testing.py index 4ecdc693b38..d2d9d1bc286 100644 --- a/yt/testing.py +++ b/yt/testing.py @@ -1259,11 +1259,13 @@ def compare_dobj_selection(self, dobj): # Set our radii to zero for now, I guess? radii = self.hsml.get(ptype, 0.0) sel_index = dobj.selector.select_points(x, y, z, radii) - sel_pos = self.particles[ptype][sel_index, :] + if sel_index is None: + sel_pos = np.empty((0, 3)) + else: + sel_pos = self.particles[ptype][sel_index, :] obj_results = [] for chunk in dobj.chunks([], "io"): obj_results.append(chunk[ptype, "particle_position"]) obj_results = np.concatenate(obj_results, axis = 0) - - return np.all(sel_pos == obj_results) + assert_equal(sel_pos, obj_results) From 934a4ebd077941f73e038dba9363d20f4cde7dfe Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Thu, 7 May 2020 10:42:00 -0500 Subject: [PATCH 11/42] Add tests for wrapping on right --- yt/frontends/gadget/tests/test_outputs.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/yt/frontends/gadget/tests/test_outputs.py b/yt/frontends/gadget/tests/test_outputs.py index 168dd10c1fe..d7fd3fbdfc4 100644 --- a/yt/frontends/gadget/tests/test_outputs.py +++ b/yt/frontends/gadget/tests/test_outputs.py @@ -135,12 +135,28 @@ def test_particle_subselection(): sp3_z = ds.sphere((12.5, 12.5, 1.0), (2.0, "code_length")) psc.compare_dobj_selection(sp3_z) - # Test wrapping around all three axes simultaneously + # Test wrapping around all three axes simultaneously on left sp3_all = ds.sphere((1.0, 1.0, 1.0), (2.0, "code_length")) psc.compare_dobj_selection(sp3_all) - sp4 = ds.sphere("c", (0.5, "unitary")) - psc.compare_dobj_selection(sp4) + # Test wrapping around each axis individually on right: x + sp4_x = ds.sphere((24.0, 12.5, 12.5), (2.0, "code_length")) + psc.compare_dobj_selection(sp4_x) + + # Test wrapping around each axis individually on right: y + sp4_y = ds.sphere((12.5, 24.0, 12.5), (2.0, "code_length")) + psc.compare_dobj_selection(sp4_y) + + # Test wrapping around each axis individually on right: z + sp4_z = ds.sphere((12.5, 12.5, 24.0), (2.0, "code_length")) + psc.compare_dobj_selection(sp4_z) + + # Test wrapping around all three axes simultaneously on right + sp4_all = ds.sphere((24.0, 24.0, 24.0), (2.0, "code_length")) + psc.compare_dobj_selection(sp4_all) + + sp5 = ds.sphere("c", (0.5, "unitary")) + psc.compare_dobj_selection(sp5) dd = ds.all_data() psc.compare_dobj_selection(dd) From 402afa0a1989db239575ef9a669069558420f540 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Thu, 7 May 2020 11:54:31 -0500 Subject: [PATCH 12/42] Rework periodic smoothing length calculations --- yt/geometry/particle_oct_container.pyx | 126 +++++++++---------------- 1 file changed, 42 insertions(+), 84 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index aab682f48a4..99752821470 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -484,22 +484,19 @@ cdef class ParticleBitmap: cdef np.uint64_t mi, miex, mi_max cdef np.uint64_t mi_split[3] cdef np.float64_t ppos[3] - cdef int skip, Nex - cdef int Nex_min[3] - cdef int Nex_max[3] - cdef np.float64_t rpos_min, rpos_max - cdef np.uint64_t xex_min, xex_max, yex_min, yex_max, zex_min, zex_max + cdef np.float64_t s_ppos[3] # shifted ppos + cdef int skip + cdef np.uint64_t bounds[3][2] cdef np.uint64_t xex, yex, zex - cdef int ix, iy, iz, ixe, iye, ize - cdef np.ndarray[np.uint64_t, ndim=1] xex_range = np.empty(7, 'uint64') - cdef np.ndarray[np.uint64_t, ndim=1] yex_range = np.empty(7, 'uint64') - cdef np.ndarray[np.uint64_t, ndim=1] zex_range = np.empty(7, 'uint64') cdef np.float64_t LE[3] cdef np.float64_t RE[3] + cdef np.float64_t DW[3] cdef np.uint8_t PER[3] cdef np.float64_t dds[3] cdef np.uint8_t[:] mask = self.masks[:, file_id] cdef np.int64_t msize = (1 << (self.index_order1 * 3)) + cdef int axiter[3][2] + cdef np.float64_t axiterv[3][2] mi_max = (1 << self.index_order1) - 1 # Copy over things for this file (type cast necessary?) for i in range(3): @@ -507,10 +504,14 @@ cdef class ParticleBitmap: RE[i] = self.right_edge[i] PER[i] = self.periodicity[i] dds[i] = self.dds_mi1[i] + DW[i] = RE[i] - LE[i] + axiter[i][0] = 0 # We always do an offset of 0 + axiterv[i][0] = 0.0 # Mark index of particles that are in this file for p in range(pos.shape[0]): skip = 0 for i in range(3): + axiter[i][1] = 999 # Skip particles outside the domain if pos[p,i] >= RE[i] or pos[p,i] < LE[i]: skip = 1 @@ -526,82 +527,39 @@ cdef class ParticleBitmap: raise RuntimeError( "Smoothing length for particle %s is negative with " "value \"%s\"" % p, hsml[p]) - Nex = 1 + # We first check if we're bounded within the domain; this follows the logic in the + # pixelize_cartesian routine. We assume that no smoothing + # length can wrap around both directions. for i in range(3): - Nex_min[i] = 0 - Nex_max[i] = 0 - rpos_min = ppos[i] - (dds[i]*mi_split[i] + LE[i]) - rpos_max = dds[i] - rpos_min - if rpos_min > hsml[p]: - Nex_min[i] = ((rpos_min-hsml[p])/dds[i]) + 1 - if rpos_max > hsml[p]: - Nex_max[i] = ((rpos_max-hsml[p])/dds[i]) + 1 - Nex *= (Nex_max[i] + Nex_min[i] + 1) - if Nex > 1: - # Ensure that min/max values for x,y,z indexes are obeyed - if (Nex_max[0] + Nex_min[0] + 1) > xex_range.shape[0]: - xex_range = np.empty(Nex_max[0] + Nex_min[0] + 1, 'uint64') - if (Nex_max[1] + Nex_min[1] + 1) > yex_range.shape[0]: - yex_range = np.empty(Nex_max[1] + Nex_min[1] + 1, 'uint64') - if (Nex_max[2] + Nex_min[2] + 1) > zex_range.shape[0]: - zex_range = np.empty(Nex_max[2] + Nex_min[2] + 1, 'uint64') - xex_min = mi_split[0] - min(Nex_min[0], mi_split[0]) - xex_max = mi_split[0] + min(Nex_max[0], (mi_max - mi_split[0])) + 1 - yex_min = mi_split[1] - min(Nex_min[1], mi_split[1]) - yex_max = mi_split[1] + min(Nex_max[1], (mi_max - mi_split[1])) + 1 - zex_min = mi_split[2] - min(Nex_min[2], mi_split[2]) - zex_max = mi_split[2] + min(Nex_max[2], (mi_max - mi_split[2])) + 1 - ixe = iye = ize = 0 - for xex in range(xex_min, xex_max): - xex_range[ixe] = xex - ixe += 1 - for yex in range(yex_min, yex_max): - yex_range[iye] = yex - iye += 1 - for zex in range(zex_min, zex_max): - zex_range[ize] = zex - ize += 1 - # Add periodic wrapping - if PER[0]: - if Nex_min[0] > mi_split[0]: - for xex in range(mi_max + 1 - (Nex_min[0] - mi_split[0]), mi_max + 1): - xex_range[ixe] = xex - ixe += 1 - if Nex_max[0] > (mi_max-mi_split[0]): - for xex in range(0, Nex_max[0] - (mi_max-mi_split[0])): - xex_range[ixe] = xex - ixe += 1 - if PER[1]: - if Nex_min[1] > mi_split[1]: - for yex in range(mi_max + 1 - (Nex_min[1] - mi_split[1]), mi_max + 1): - yex_range[iye] = yex - iye += 1 - if Nex_max[1] > (mi_max-mi_split[1]): - for yex in range(0, Nex_max[1] - (mi_max-mi_split[1])): - yex_range[iye] = yex - iye += 1 - if PER[2]: - if Nex_min[2] > mi_split[2]: - for zex in range(mi_max + 1 - (Nex_min[2] - mi_split[2]), mi_max + 1): - zex_range[ize] = zex - ize += 1 - if Nex_max[2] > (mi_max-mi_split[2]): - for zex in range(0, Nex_max[2] - (mi_max-mi_split[2])): - zex_range[ize] = zex - ize += 1 - for ix in range(ixe): - xex = xex_range[ix] - for iy in range(iye): - yex = yex_range[iy] - for iz in range(ize): - zex = zex_range[iz] - miex = encode_morton_64bit(xex, yex, zex) - if miex >= msize: - raise IndexError( - "Index for a softening region " + - "({}) exceeds ".format(miex) + - "max ({})".format(msize)) - mask[miex] = 1 + if PER[i] and ppos[i] - hsml[p] < LE[i]: + axiter[i][1] = +1 + axiterv[i][1] = DW[i] + elif PER[i] and ppos[i] + hsml[p] > RE[i]: + axiter[i][1] = -1 + axiterv[i][1] = -DW[i] + for xi in range(2): + if axiter[0][xi] == 999: continue + s_ppos[0] = ppos[0] + axiterv[0][xi] + for yi in range(2): + if axiter[1][yi] == 999: continue + s_ppos[1] = ppos[1] + axiterv[1][yi] + for zi in range(2): + if axiter[2][zi] == 999: continue + s_ppos[2] = ppos[2] + axiterv[2][zi] + # OK, now we compute the left and right edges for this shift. + for i in range(3): + bounds[i][0] = i64max(((s_ppos[i] - LE[i] - hsml[p])/dds[i]), 0) + bounds[i][1] = i64min(((s_ppos[i] - LE[i] + hsml[p])/dds[i]), mi_max) + for xex in range(bounds[0][0], bounds[0][1]): + for yex in range(bounds[1][0], bounds[1][1]): + for zex in range(bounds[2][0], bounds[2][1]): + miex = encode_morton_64bit(xex, yex, zex) + mask[miex] = 1 + if miex >= msize: + raise IndexError( + "Index for a softening region " + + "({}) exceeds ".format(miex) + + "max ({})".format(msize)) @cython.boundscheck(False) @cython.wraparound(False) From 2e39ef15ff207a87c689d8cccb003f10d3374068 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Thu, 7 May 2020 11:54:31 -0500 Subject: [PATCH 13/42] Rework periodic smoothing length calculations --- yt/geometry/particle_oct_container.pyx | 141 +++++++++---------------- 1 file changed, 51 insertions(+), 90 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index aab682f48a4..7b766121bfe 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -484,22 +484,20 @@ cdef class ParticleBitmap: cdef np.uint64_t mi, miex, mi_max cdef np.uint64_t mi_split[3] cdef np.float64_t ppos[3] - cdef int skip, Nex - cdef int Nex_min[3] - cdef int Nex_max[3] - cdef np.float64_t rpos_min, rpos_max - cdef np.uint64_t xex_min, xex_max, yex_min, yex_max, zex_min, zex_max + cdef np.float64_t s_ppos[3] # shifted ppos + cdef int skip + cdef np.uint64_t bounds[3][2] cdef np.uint64_t xex, yex, zex - cdef int ix, iy, iz, ixe, iye, ize - cdef np.ndarray[np.uint64_t, ndim=1] xex_range = np.empty(7, 'uint64') - cdef np.ndarray[np.uint64_t, ndim=1] yex_range = np.empty(7, 'uint64') - cdef np.ndarray[np.uint64_t, ndim=1] zex_range = np.empty(7, 'uint64') cdef np.float64_t LE[3] cdef np.float64_t RE[3] + cdef np.float64_t DW[3] cdef np.uint8_t PER[3] cdef np.float64_t dds[3] + cdef np.float64_t radius cdef np.uint8_t[:] mask = self.masks[:, file_id] cdef np.int64_t msize = (1 << (self.index_order1 * 3)) + cdef int axiter[3][2] + cdef np.float64_t axiterv[3][2] mi_max = (1 << self.index_order1) - 1 # Copy over things for this file (type cast necessary?) for i in range(3): @@ -507,10 +505,14 @@ cdef class ParticleBitmap: RE[i] = self.right_edge[i] PER[i] = self.periodicity[i] dds[i] = self.dds_mi1[i] + DW[i] = RE[i] - LE[i] + axiter[i][0] = 0 # We always do an offset of 0 + axiterv[i][0] = 0.0 # Mark index of particles that are in this file for p in range(pos.shape[0]): skip = 0 for i in range(3): + axiter[i][1] = 999 # Skip particles outside the domain if pos[p,i] >= RE[i] or pos[p,i] < LE[i]: skip = 1 @@ -521,87 +523,46 @@ cdef class ParticleBitmap: dds, mi_split) mask[mi] = 1 # Expand mask by softening - if hsml is not None: - if hsml[p] < 0: - raise RuntimeError( - "Smoothing length for particle %s is negative with " - "value \"%s\"" % p, hsml[p]) - Nex = 1 - for i in range(3): - Nex_min[i] = 0 - Nex_max[i] = 0 - rpos_min = ppos[i] - (dds[i]*mi_split[i] + LE[i]) - rpos_max = dds[i] - rpos_min - if rpos_min > hsml[p]: - Nex_min[i] = ((rpos_min-hsml[p])/dds[i]) + 1 - if rpos_max > hsml[p]: - Nex_max[i] = ((rpos_max-hsml[p])/dds[i]) + 1 - Nex *= (Nex_max[i] + Nex_min[i] + 1) - if Nex > 1: - # Ensure that min/max values for x,y,z indexes are obeyed - if (Nex_max[0] + Nex_min[0] + 1) > xex_range.shape[0]: - xex_range = np.empty(Nex_max[0] + Nex_min[0] + 1, 'uint64') - if (Nex_max[1] + Nex_min[1] + 1) > yex_range.shape[0]: - yex_range = np.empty(Nex_max[1] + Nex_min[1] + 1, 'uint64') - if (Nex_max[2] + Nex_min[2] + 1) > zex_range.shape[0]: - zex_range = np.empty(Nex_max[2] + Nex_min[2] + 1, 'uint64') - xex_min = mi_split[0] - min(Nex_min[0], mi_split[0]) - xex_max = mi_split[0] + min(Nex_max[0], (mi_max - mi_split[0])) + 1 - yex_min = mi_split[1] - min(Nex_min[1], mi_split[1]) - yex_max = mi_split[1] + min(Nex_max[1], (mi_max - mi_split[1])) + 1 - zex_min = mi_split[2] - min(Nex_min[2], mi_split[2]) - zex_max = mi_split[2] + min(Nex_max[2], (mi_max - mi_split[2])) + 1 - ixe = iye = ize = 0 - for xex in range(xex_min, xex_max): - xex_range[ixe] = xex - ixe += 1 - for yex in range(yex_min, yex_max): - yex_range[iye] = yex - iye += 1 - for zex in range(zex_min, zex_max): - zex_range[ize] = zex - ize += 1 - # Add periodic wrapping - if PER[0]: - if Nex_min[0] > mi_split[0]: - for xex in range(mi_max + 1 - (Nex_min[0] - mi_split[0]), mi_max + 1): - xex_range[ixe] = xex - ixe += 1 - if Nex_max[0] > (mi_max-mi_split[0]): - for xex in range(0, Nex_max[0] - (mi_max-mi_split[0])): - xex_range[ixe] = xex - ixe += 1 - if PER[1]: - if Nex_min[1] > mi_split[1]: - for yex in range(mi_max + 1 - (Nex_min[1] - mi_split[1]), mi_max + 1): - yex_range[iye] = yex - iye += 1 - if Nex_max[1] > (mi_max-mi_split[1]): - for yex in range(0, Nex_max[1] - (mi_max-mi_split[1])): - yex_range[iye] = yex - iye += 1 - if PER[2]: - if Nex_min[2] > mi_split[2]: - for zex in range(mi_max + 1 - (Nex_min[2] - mi_split[2]), mi_max + 1): - zex_range[ize] = zex - ize += 1 - if Nex_max[2] > (mi_max-mi_split[2]): - for zex in range(0, Nex_max[2] - (mi_max-mi_split[2])): - zex_range[ize] = zex - ize += 1 - for ix in range(ixe): - xex = xex_range[ix] - for iy in range(iye): - yex = yex_range[iy] - for iz in range(ize): - zex = zex_range[iz] - miex = encode_morton_64bit(xex, yex, zex) - if miex >= msize: - raise IndexError( - "Index for a softening region " + - "({}) exceeds ".format(miex) + - "max ({})".format(msize)) - mask[miex] = 1 + if hsml is None: + continue + if hsml[p] < 0: + raise RuntimeError( + "Smoothing length for particle %s is negative with " + "value \"%s\"" % p, hsml[p]) + radius = hsml[p] + # We first check if we're bounded within the domain; this follows the logic in the + # pixelize_cartesian routine. We assume that no smoothing + # length can wrap around both directions. + for i in range(3): + if PER[i] and ppos[i] - radius < LE[i]: + axiter[i][1] = +1 + axiterv[i][1] = DW[i] + elif PER[i] and ppos[i] + radius > RE[i]: + axiter[i][1] = -1 + axiterv[i][1] = -DW[i] + for xi in range(2): + if axiter[0][xi] == 999: continue + s_ppos[0] = ppos[0] + axiterv[0][xi] + for yi in range(2): + if axiter[1][yi] == 999: continue + s_ppos[1] = ppos[1] + axiterv[1][yi] + for zi in range(2): + if axiter[2][zi] == 999: continue + s_ppos[2] = ppos[2] + axiterv[2][zi] + # OK, now we compute the left and right edges for this shift. + for i in range(3): + bounds[i][0] = i64max(((s_ppos[i] - LE[i] - radius)/dds[i]), 0) + bounds[i][1] = i64min(((s_ppos[i] - LE[i] + radius)/dds[i]), mi_max) + for xex in range(bounds[0][0], bounds[0][1]): + for yex in range(bounds[1][0], bounds[1][1]): + for zex in range(bounds[2][0], bounds[2][1]): + miex = encode_morton_64bit(xex, yex, zex) + mask[miex] = 1 + if miex >= msize: + raise IndexError( + "Index for a softening region " + + "({}) exceeds ".format(miex) + + "max ({})".format(msize)) @cython.boundscheck(False) @cython.wraparound(False) From 0b68faff7cc1463fdd7f7efe42bd5f76e1e2f8b9 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Thu, 7 May 2020 14:34:22 -0500 Subject: [PATCH 14/42] We need bounds[i][1] + 1 for inclusive loops --- yt/frontends/gadget/tests/test_outputs.py | 71 ++++++++++++++++++++++- yt/geometry/particle_oct_container.pyx | 5 +- yt/testing.py | 46 +++++++++++++++ 3 files changed, 120 insertions(+), 2 deletions(-) diff --git a/yt/frontends/gadget/tests/test_outputs.py b/yt/frontends/gadget/tests/test_outputs.py index b952bd2c4f9..d7fd3fbdfc4 100644 --- a/yt/frontends/gadget/tests/test_outputs.py +++ b/yt/frontends/gadget/tests/test_outputs.py @@ -5,7 +5,9 @@ import tempfile import yt -from yt.testing import requires_file +from yt.testing import requires_file, \ + ParticleSelectionComparison, \ + assert_equal from yt.utilities.answer_testing.framework import \ data_dir_load, \ requires_ds, \ @@ -107,6 +109,73 @@ def test_multifile_read(): assert isinstance(data_dir_load(snap_33), GadgetDataset) assert isinstance(data_dir_load(snap_33_dir), GadgetDataset) +@requires_file(snap_33) +def test_particle_subselection(): + #This checks that we correctly subselect from a dataset, first by making + #sure we get all the particles, then by comparing manual selections against + #them. + ds = data_dir_load(snap_33) + psc = ParticleSelectionComparison(ds) + + sp1 = ds.sphere("c", (0.1, "unitary")) + psc.compare_dobj_selection(sp1) + + sp2 = ds.sphere("c", (0.2, "unitary")) + psc.compare_dobj_selection(sp2) + + # Test wrapping around each axis individually: x + sp3_x = ds.sphere((1.0, 12.5, 12.5), (2.0, "code_length")) + psc.compare_dobj_selection(sp3_x) + + # Test wrapping around each axis individually: y + sp3_y = ds.sphere((12.5, 1.0, 12.5), (2.0, "code_length")) + psc.compare_dobj_selection(sp3_y) + + # Test wrapping around each axis individually: z + sp3_z = ds.sphere((12.5, 12.5, 1.0), (2.0, "code_length")) + psc.compare_dobj_selection(sp3_z) + + # Test wrapping around all three axes simultaneously on left + sp3_all = ds.sphere((1.0, 1.0, 1.0), (2.0, "code_length")) + psc.compare_dobj_selection(sp3_all) + + # Test wrapping around each axis individually on right: x + sp4_x = ds.sphere((24.0, 12.5, 12.5), (2.0, "code_length")) + psc.compare_dobj_selection(sp4_x) + + # Test wrapping around each axis individually on right: y + sp4_y = ds.sphere((12.5, 24.0, 12.5), (2.0, "code_length")) + psc.compare_dobj_selection(sp4_y) + + # Test wrapping around each axis individually on right: z + sp4_z = ds.sphere((12.5, 12.5, 24.0), (2.0, "code_length")) + psc.compare_dobj_selection(sp4_z) + + # Test wrapping around all three axes simultaneously on right + sp4_all = ds.sphere((24.0, 24.0, 24.0), (2.0, "code_length")) + psc.compare_dobj_selection(sp4_all) + + sp5 = ds.sphere("c", (0.5, "unitary")) + psc.compare_dobj_selection(sp5) + + dd = ds.all_data() + psc.compare_dobj_selection(dd) + + reg1 = ds.r[ (0.1, 'unitary'):(0.9, 'unitary'), + (0.1, 'unitary'):(0.9, 'unitary'), + (0.1, 'unitary'):(0.9, 'unitary')] + psc.compare_dobj_selection(reg1) + + reg2 = ds.r[ (0.8, 'unitary'):(0.85, 'unitary'), + (0.8, 'unitary'):(0.85, 'unitary'), + (0.8, 'unitary'):(0.85, 'unitary')] + psc.compare_dobj_selection(reg2) + + reg3 = ds.r[ (0.3, 'unitary'):(0.6, 'unitary'), + (0.2, 'unitary'):(0.8, 'unitary'), + (0.0, 'unitary'):(0.1, 'unitary')] + psc.compare_dobj_selection(reg3) + @requires_ds(BE_Gadget) def test_bigendian_field_access(): ds = data_dir_load(BE_Gadget) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index 7b766121bfe..dc08ab70ed5 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -552,7 +552,10 @@ cdef class ParticleBitmap: # OK, now we compute the left and right edges for this shift. for i in range(3): bounds[i][0] = i64max(((s_ppos[i] - LE[i] - radius)/dds[i]), 0) - bounds[i][1] = i64min(((s_ppos[i] - LE[i] + radius)/dds[i]), mi_max) + bounds[i][1] = i64min(((s_ppos[i] - LE[i] + radius)/dds[i]), mi_max) + 1 + # We go to the upper bound plus one so that we have *inclusive* loops -- the upper bound + # is the cell *index*, so we want to make sure we include that cell. This is also why + # we don't need to worry about mi_max being the max index rather than the cell count. for xex in range(bounds[0][0], bounds[0][1]): for yex in range(bounds[1][0], bounds[1][1]): for zex in range(bounds[2][0], bounds[2][1]): diff --git a/yt/testing.py b/yt/testing.py index 94abdceda26..d2d9d1bc286 100644 --- a/yt/testing.py +++ b/yt/testing.py @@ -1223,3 +1223,49 @@ def setUp(self): def tearDown(self): os.chdir(self.curdir) shutil.rmtree(self.tmpdir) + +class ParticleSelectionComparison: + """ + This is a test helper class that takes a particle dataset, caches the + particles it has on disk (manually reading them using lower-level IO + routines) and then received a data object that it compares against manually + running the data object's selection routines. All supplied data objects + must be created from the input dataset. + """ + + def __init__(self, ds): + self.ds = ds + # Construct an index so that we get all the data_files + ds.index + particles = {} + # hsml is the smoothing length we use for radial selection + hsml = {} + for data_file in ds.index.data_files: + for ptype, pos_arr in ds.index.io._yield_coordinates(data_file): + particles.setdefault(ptype, []).append(pos_arr) + if ptype in getattr(ds, '_sph_ptypes', ()): + hsml.setdefault(ptype, []).append(ds.index.io._get_smoothing_length( + data_file, pos_arr.dtype, pos_arr.shape)) + for ptype in particles: + particles[ptype] = np.concatenate(particles[ptype]) + if ptype in hsml: + hsml[ptype] = np.concatenate(hsml[ptype]) + self.particles = particles + self.hsml = hsml + + def compare_dobj_selection(self, dobj): + for ptype in sorted(self.particles): + x, y, z = self.particles[ptype].T + # Set our radii to zero for now, I guess? + radii = self.hsml.get(ptype, 0.0) + sel_index = dobj.selector.select_points(x, y, z, radii) + if sel_index is None: + sel_pos = np.empty((0, 3)) + else: + sel_pos = self.particles[ptype][sel_index, :] + + obj_results = [] + for chunk in dobj.chunks([], "io"): + obj_results.append(chunk[ptype, "particle_position"]) + obj_results = np.concatenate(obj_results, axis = 0) + assert_equal(sel_pos, obj_results) From b20c3edbbc5b057077d12dc456974f797ce726f6 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Fri, 8 May 2020 07:11:34 -0500 Subject: [PATCH 15/42] remove unused import --- yt/frontends/gadget/tests/test_outputs.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yt/frontends/gadget/tests/test_outputs.py b/yt/frontends/gadget/tests/test_outputs.py index d7fd3fbdfc4..bdba0994e55 100644 --- a/yt/frontends/gadget/tests/test_outputs.py +++ b/yt/frontends/gadget/tests/test_outputs.py @@ -6,8 +6,7 @@ import yt from yt.testing import requires_file, \ - ParticleSelectionComparison, \ - assert_equal + ParticleSelectionComparison from yt.utilities.answer_testing.framework import \ data_dir_load, \ requires_ds, \ From 8cdf546033b93f1bd39111e56f3587d18a534ffd Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Fri, 8 May 2020 21:02:05 -0500 Subject: [PATCH 16/42] First, not-quite-working, pass at refined stuff. --- yt/geometry/particle_oct_container.pyx | 399 ++++++++++++++++--------- 1 file changed, 253 insertions(+), 146 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index f8639c399db..9808c6fb3f8 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -30,12 +30,14 @@ from yt.funcs import get_pbar from particle_deposit cimport gind from yt.utilities.lib.ewah_bool_array cimport \ - ewah_bool_array, ewah_bool_iterator + ewah_bool_array, ewah_bool_iterator, ewah_map #from yt.utilities.lib.ewah_bool_wrap cimport \ from ..utilities.lib.ewah_bool_wrap cimport BoolArrayCollection +from libcpp cimport bool from libcpp.map cimport map from libcpp.vector cimport vector from libcpp.pair cimport pair +from libcpp.unordered_set cimport unordered_set as uset from cython.operator cimport dereference, preincrement import struct import os @@ -53,6 +55,8 @@ from ..utilities.lib.ewah_bool_wrap cimport SparseUnorderedRefinedBitmaskSet as from ..utilities.lib.ewah_bool_wrap cimport BoolArrayCollectionUncompressed as BoolArrayColl from ..utilities.lib.ewah_bool_wrap cimport FileBitmasks +ctypedef map[np.uint64_t, vector[bool]] CoarseRefinedSets + cdef class ParticleOctreeContainer(OctreeContainer): cdef Oct** oct_list #The starting oct index of each domain @@ -551,8 +555,9 @@ cdef class ParticleBitmap: s_ppos[2] = ppos[2] + axiterv[2][zi] # OK, now we compute the left and right edges for this shift. for i in range(3): - bounds[i][0] = i64max(((s_ppos[i] - LE[i] - radius)/dds[i]), 0) - bounds[i][1] = i64min(((s_ppos[i] - LE[i] + radius)/dds[i]), mi_max) + 1 + # Note that we cast here to int64_t because this could be negative + bounds[i][0] = i64max(((s_ppos[i] - LE[i] - radius)/dds[i]), 0) + bounds[i][1] = i64min(((s_ppos[i] - LE[i] + radius)/dds[i]), mi_max) + 1 # We go to the upper bound plus one so that we have *inclusive* loops -- the upper bound # is the cell *index*, so we want to make sure we include that cell. This is also why # we don't need to worry about mi_max being the max index rather than the cell count. @@ -617,12 +622,16 @@ cdef class ParticleBitmap: cdef np.int64_t i, p cdef np.uint64_t mi1, mi2 cdef np.float64_t ppos[3] + cdef np.float64_t s_ppos[3] # shifted ppos cdef int skip, Nex + cdef np.uint64_t bounds[3][2] cdef np.float64_t LE[3] cdef np.float64_t RE[3] + cdef np.float64_t DW[3] cdef np.uint8_t PER[3] cdef np.float64_t dds1[3] cdef np.float64_t dds2[3] + cdef np.float64_t radius cdef np.uint64_t mi_split1[3] cdef np.uint64_t mi_split2[3] cdef np.uint64_t miex1, miex2, mi1_max, mi2_max @@ -640,8 +649,12 @@ cdef class ParticleBitmap: cdef np.ndarray[np.uint64_t, ndim=1] yex2_range = np.empty(7, 'uint64') cdef np.ndarray[np.uint64_t, ndim=1] zex2_range = np.empty(7, 'uint64') cdef np.int64_t msize = sub_mi1.shape[0] + cdef int axiter[3][2] + cdef np.float64_t axiterv[3][2] + cdef CoarseRefinedSets coarse_refined_map mi1_max = (1 << self.index_order1) - 1 mi2_max = (1 << self.index_order2) - 1 + cdef np.uint64_t max_mi2_elements = 1 << (3*self.index_order2) # Copy things from structure (type cast) for i in range(3): LE[i] = self.left_edge[i] @@ -649,10 +662,14 @@ cdef class ParticleBitmap: PER[i] = self.periodicity[i] dds1[i] = self.dds_mi1[i] dds2[i] = self.dds_mi2[i] + DW[i] = RE[i] - LE[i] + axiter[i][0] = 0 # We always do an offset of 0 + axiterv[i][0] = 0.0 # Loop over positions skipping those outside the domain for p in range(pos.shape[0]): skip = 0 for i in range(3): + axiter[i][1] = 999 if pos[p,i] >= RE[i] or pos[p,i] < LE[i]: skip = 1 break @@ -661,162 +678,252 @@ cdef class ParticleBitmap: # Only look if collision at coarse index mi1 = bounded_morton_split_dds(ppos[0], ppos[1], ppos[2], LE, dds1, mi_split1) - if mask[mi1] > 1: + if hsml is None: + if mask[mi1] <= 1: # only one thing in this area + continue # Determine sub index within cell of primary index - if nsub_mi >= msize: - raise IndexError("Refined index exceeded estimate.") mi2 = bounded_morton_split_relative_dds( ppos[0], ppos[1], ppos[2], LE, dds1, dds2, mi_split2) + if coarse_refined_map.count(mi1) == 0: + coarse_refined_map[mi1] = vector[bool](max_mi2_elements, False) + coarse_refined_map[mi1][mi2] = True + else: # only hit if we have smoothing lengths. + # We have to do essentially the identical process to in the coarse indexing, + # except here we need to fill in all the subranges as well as the coarse ranges + # Note that we are also doing the null case, where we do no shifting + radius = hsml[p] + for i in range(3): + if PER[i] and ppos[i] - radius < LE[i]: + axiter[i][1] = +1 + axiterv[i][1] = DW[i] + elif PER[i] and ppos[i] + radius > RE[i]: + axiter[i][1] = -1 + axiterv[i][1] = -DW[i] + for xi in range(2): + if axiter[0][xi] == 999: continue + s_ppos[0] = ppos[0] + axiterv[0][xi] + for yi in range(2): + if axiter[1][yi] == 999: continue + s_ppos[1] = ppos[1] + axiterv[1][yi] + for zi in range(2): + if axiter[2][zi] == 999: continue + s_ppos[2] = ppos[2] + axiterv[2][zi] + # OK, now we compute the left and right edges for this shift. + for i in range(3): + # casting to int64 is not nice but is so we can have negative values we clip + bounds[i][0] = i64max(((s_ppos[i] - LE[i] - radius)/dds1[i]), 0) + bounds[i][1] = i64min(((s_ppos[i] - LE[i] + radius)/dds1[i]), mi1_max) + 1 + # We go to the upper bound plus one so that we have *inclusive* loops -- the upper bound + # is the cell *index*, so we want to make sure we include that cell. This is also why + # we don't need to worry about mi_max being the max index rather than the cell count. + for xex in range(bounds[0][0], bounds[0][1]): + for yex in range(bounds[1][0], bounds[1][1]): + for zex in range(bounds[2][0], bounds[2][1]): + miex = encode_morton_64bit(xex, yex, zex) + if mask[miex] <= 1: + continue + # Now we need to fill our sub-range + if coarse_refined_map.count(miex) == 0: + coarse_refined_map[miex] = vector[bool](max_mi2_elements, False) + self.__fill_refined_ranges(s_ppos, radius, LE, RE, + dds1, xex, yex, zex, + dds2, mi1_max, mi2_max, miex, + coarse_refined_map[miex]) + print("THIS MANY COARSE CELLS", coarse_refined_map.size()) + cdef np.uint64_t count + for it1 in coarse_refined_map: + mi1 = it1.first + count = 0 + for it2 in it1.second: + if it2 == True: + count += 1 sub_mi1[nsub_mi] = mi1 - sub_mi2[nsub_mi] = mi2 - nsub_mi += 1 - # Expand for smoothing - if hsml is not None: - Nex = 1 - for i in range(3): - Nex_min[i] = 0 - Nex_max[i] = 0 - rpos_min = ppos[i] - (dds2[i]*mi_split2[i] + dds1[i]*mi_split1[i] + LE[i]) - rpos_max = dds2[i] - rpos_min - if rpos_min > hsml[p]: - Nex_min[i] = ((rpos_min-hsml[p])/dds2[i]) + 1 - if rpos_max > hsml[p]: - Nex_max[i] = ((rpos_max-hsml[p])/dds2[i]) + 1 - Nex *= (Nex_max[i] + Nex_min[i] + 1) - if Nex > 1: - # Ensure that min/max values for x,y,z indexes are obeyed - if (Nex_max[0] + Nex_min[0] + 1) > xex1_range.shape[0]: - xex1_range = np.empty(Nex_max[0] + Nex_min[0] + 1, 'uint64') - xex2_range = np.empty(Nex_max[0] + Nex_min[0] + 1, 'uint64') - if (Nex_max[1] + Nex_min[1] + 1) > yex1_range.shape[0]: - yex1_range = np.empty(Nex_max[1] + Nex_min[1] + 1, 'uint64') - yex2_range = np.empty(Nex_max[1] + Nex_min[1] + 1, 'uint64') - if (Nex_max[2] + Nex_min[2] + 1) > zex1_range.shape[0]: - zex1_range = np.empty(Nex_max[2] + Nex_min[2] + 1, 'uint64') - zex2_range = np.empty(Nex_max[2] + Nex_min[2] + 1, 'uint64') - xex2_min = mi_split2[0] - min(Nex_min[0], mi_split2[0]) - xex2_max = mi_split2[0] + min(Nex_max[0], (mi2_max - mi_split2[0])) + 1 - yex2_min = mi_split2[1] - min(Nex_min[1], mi_split2[1]) - yex2_max = mi_split2[1] + min(Nex_max[1], (mi2_max - mi_split2[1])) + 1 - zex2_min = mi_split2[2] - min(Nex_min[2], mi_split2[2]) - zex2_max = mi_split2[2] + min(Nex_max[2], (mi2_max - mi_split2[2])) + 1 - ixe = iye = ize = 0 - for xex2 in range(xex2_min, xex2_max): - xex1_range[ixe] = mi_split1[0] + sub_mi2[nsub_mi] = it2 + #nsub_mi += 1 + print("IN ", mi1, "THIS MANY REFINED CELLS", count) + return nsub_mi + + if 0: + # Expand for smoothing + Nex = 1 + for i in range(3): + Nex_min[i] = 0 + Nex_max[i] = 0 + rpos_min = ppos[i] - (dds2[i]*mi_split2[i] + dds1[i]*mi_split1[i] + LE[i]) + rpos_max = dds2[i] - rpos_min + if rpos_min > hsml[p]: + Nex_min[i] = ((rpos_min-hsml[p])/dds2[i]) + 1 + if rpos_max > hsml[p]: + Nex_max[i] = ((rpos_max-hsml[p])/dds2[i]) + 1 + Nex *= (Nex_max[i] + Nex_min[i] + 1) + if Nex > 1: + # Ensure that min/max values for x,y,z indexes are obeyed + if (Nex_max[0] + Nex_min[0] + 1) > xex1_range.shape[0]: + xex1_range = np.empty(Nex_max[0] + Nex_min[0] + 1, 'uint64') + xex2_range = np.empty(Nex_max[0] + Nex_min[0] + 1, 'uint64') + if (Nex_max[1] + Nex_min[1] + 1) > yex1_range.shape[0]: + yex1_range = np.empty(Nex_max[1] + Nex_min[1] + 1, 'uint64') + yex2_range = np.empty(Nex_max[1] + Nex_min[1] + 1, 'uint64') + if (Nex_max[2] + Nex_min[2] + 1) > zex1_range.shape[0]: + zex1_range = np.empty(Nex_max[2] + Nex_min[2] + 1, 'uint64') + zex2_range = np.empty(Nex_max[2] + Nex_min[2] + 1, 'uint64') + xex2_min = mi_split2[0] - min(Nex_min[0], mi_split2[0]) + xex2_max = mi_split2[0] + min(Nex_max[0], (mi2_max - mi_split2[0])) + 1 + yex2_min = mi_split2[1] - min(Nex_min[1], mi_split2[1]) + yex2_max = mi_split2[1] + min(Nex_max[1], (mi2_max - mi_split2[1])) + 1 + zex2_min = mi_split2[2] - min(Nex_min[2], mi_split2[2]) + zex2_max = mi_split2[2] + min(Nex_max[2], (mi2_max - mi_split2[2])) + 1 + ixe = iye = ize = 0 + for xex2 in range(xex2_min, xex2_max): + xex1_range[ixe] = mi_split1[0] + xex2_range[ixe] = xex2 + ixe += 1 + for yex2 in range(yex2_min, yex2_max): + yex1_range[iye] = mi_split1[1] + yex2_range[iye] = yex2 + iye += 1 + for zex2 in range(zex2_min, zex2_max): + zex1_range[ize] = mi_split1[2] + zex2_range[ize] = zex2 + ize += 1 + # Expand to adjacent coarse cells, wrapping periodically + # if need be + # x + if Nex_min[0] > mi_split2[0]: + if mi_split1[0] > 0: + for xex2 in range(mi2_max + 1 - (Nex_min[0] - mi_split2[0]), mi2_max + 1): + xex1_range[ixe] = mi_split1[0] - 1 + xex2_range[ixe] = xex2 + ixe += 1 + elif PER[0]: + for xex2 in range(mi2_max + 1 - (Nex_min[0] - mi_split2[0]), mi2_max + 1): + xex1_range[ixe] = mi1_max + xex2_range[ixe] = xex2 + ixe += 1 + if Nex_max[0] > (mi2_max-mi_split2[0]): + if mi_split1[0] < mi1_max: + for xex2 in range(0, Nex_max[0] - (mi2_max-mi_split2[0])): + xex1_range[ixe] = mi_split1[0] + 1 + xex2_range[ixe] = xex2 + ixe += 1 + elif PER[0]: + for xex2 in range(0, Nex_max[0] - (mi2_max-mi_split2[0])): + xex1_range[ixe] = 0 xex2_range[ixe] = xex2 ixe += 1 - for yex2 in range(yex2_min, yex2_max): - yex1_range[iye] = mi_split1[1] + # y + if Nex_min[1] > mi_split2[1]: + if mi_split1[1] > 0: + for yex2 in range(mi2_max + 1 - (Nex_min[1] - mi_split2[1]), mi2_max + 1): + yex1_range[iye] = mi_split1[1] - 1 + yex2_range[iye] = yex2 + iye += 1 + elif PER[1]: + for yex2 in range(mi2_max + 1 - (Nex_min[1] - mi_split2[1]), mi2_max + 1): + yex1_range[iye] = mi1_max yex2_range[iye] = yex2 iye += 1 - for zex2 in range(zex2_min, zex2_max): - zex1_range[ize] = mi_split1[2] + if Nex_max[1] > (mi2_max-mi_split2[1]): + if mi_split1[1] < mi1_max: + for yex2 in range(0, Nex_max[1] - (mi2_max-mi_split2[1])): + yex1_range[iye] = mi_split1[1] + 1 + yex2_range[iye] = yex2 + iye += 1 + elif PER[1]: + for yex2 in range(0, Nex_max[1] - (mi2_max-mi_split2[1])): + yex1_range[iye] = 0 + yex2_range[iye] = yex2 + iye += 1 + # z + if Nex_min[2] > mi_split2[2]: + if mi_split1[2] > 0: + for zex2 in range(mi2_max + 1 - (Nex_min[2] - mi_split2[2]), mi2_max + 1): + zex1_range[ize] = mi_split1[2] - 1 zex2_range[ize] = zex2 ize += 1 - # Expand to adjacent coarse cells, wrapping periodically - # if need be - # x - if Nex_min[0] > mi_split2[0]: - if mi_split1[0] > 0: - for xex2 in range(mi2_max + 1 - (Nex_min[0] - mi_split2[0]), mi2_max + 1): - xex1_range[ixe] = mi_split1[0] - 1 - xex2_range[ixe] = xex2 - ixe += 1 - elif PER[0]: - for xex2 in range(mi2_max + 1 - (Nex_min[0] - mi_split2[0]), mi2_max + 1): - xex1_range[ixe] = mi1_max - xex2_range[ixe] = xex2 - ixe += 1 - if Nex_max[0] > (mi2_max-mi_split2[0]): - if mi_split1[0] < mi1_max: - for xex2 in range(0, Nex_max[0] - (mi2_max-mi_split2[0])): - xex1_range[ixe] = mi_split1[0] + 1 - xex2_range[ixe] = xex2 - ixe += 1 - elif PER[0]: - for xex2 in range(0, Nex_max[0] - (mi2_max-mi_split2[0])): - xex1_range[ixe] = 0 - xex2_range[ixe] = xex2 - ixe += 1 - # y - if Nex_min[1] > mi_split2[1]: - if mi_split1[1] > 0: - for yex2 in range(mi2_max + 1 - (Nex_min[1] - mi_split2[1]), mi2_max + 1): - yex1_range[iye] = mi_split1[1] - 1 - yex2_range[iye] = yex2 - iye += 1 - elif PER[1]: - for yex2 in range(mi2_max + 1 - (Nex_min[1] - mi_split2[1]), mi2_max + 1): - yex1_range[iye] = mi1_max - yex2_range[iye] = yex2 - iye += 1 - if Nex_max[1] > (mi2_max-mi_split2[1]): - if mi_split1[1] < mi1_max: - for yex2 in range(0, Nex_max[1] - (mi2_max-mi_split2[1])): - yex1_range[iye] = mi_split1[1] + 1 - yex2_range[iye] = yex2 - iye += 1 - elif PER[1]: - for yex2 in range(0, Nex_max[1] - (mi2_max-mi_split2[1])): - yex1_range[iye] = 0 - yex2_range[iye] = yex2 - iye += 1 - # z - if Nex_min[2] > mi_split2[2]: - if mi_split1[2] > 0: - for zex2 in range(mi2_max + 1 - (Nex_min[2] - mi_split2[2]), mi2_max + 1): - zex1_range[ize] = mi_split1[2] - 1 - zex2_range[ize] = zex2 - ize += 1 - elif PER[2]: - for zex2 in range(mi2_max + 1 - (Nex_min[2] - mi_split2[2]), mi2_max + 1): - zex1_range[ize] = mi1_max - zex2_range[ize] = zex2 - ize += 1 - if Nex_max[2] > (mi2_max-mi_split2[2]): - if mi_split1[2] < mi1_max: - for zex2 in range(0, Nex_max[2] - (mi2_max-mi_split2[2])): - zex1_range[ize] = mi_split1[2] + 1 - zex2_range[ize] = zex2 - ize += 1 - elif PER[2]: - for zex2 in range(0, Nex_max[2] - (mi2_max-mi_split2[2])): - zex1_range[ize] = 0 - zex2_range[ize] = zex2 - ize += 1 - for ix in range(ixe): - xex1 = xex1_range[ix] - xex2 = xex2_range[ix] - for iy in range(iye): - yex1 = yex1_range[iy] - yex2 = yex2_range[iy] - for iz in range(ize): - zex1 = zex1_range[iz] - zex2 = zex2_range[iz] - if (xex1 == mi_split1[0] and xex2 == mi_split2[0] and - yex1 == mi_split1[1] and yex2 == mi_split2[1] and - zex1 == mi_split1[2] and zex2 == mi_split2[2]): - continue - miex1 = encode_morton_64bit(xex1, yex1, zex1) - miex2 = encode_morton_64bit(xex2, yex2, zex2) - if nsub_mi >= msize: - # Uncomment these lines to allow periodic - # caching of refined indices - # self.bitmasks._set_refined_index_array( - # file_id, nsub_mi, sub_mi1, sub_mi2) - # nsub_mi = 0 - raise IndexError( - "Refined index exceeded original " - "estimate.\n" - "nsub_mi = %s, " - "sub_mi1.shape[0] = %s" - % (nsub_mi, sub_mi1.shape[0])) - sub_mi1[nsub_mi] = miex1 - sub_mi2[nsub_mi] = miex2 - nsub_mi += 1 + elif PER[2]: + for zex2 in range(mi2_max + 1 - (Nex_min[2] - mi_split2[2]), mi2_max + 1): + zex1_range[ize] = mi1_max + zex2_range[ize] = zex2 + ize += 1 + if Nex_max[2] > (mi2_max-mi_split2[2]): + if mi_split1[2] < mi1_max: + for zex2 in range(0, Nex_max[2] - (mi2_max-mi_split2[2])): + zex1_range[ize] = mi_split1[2] + 1 + zex2_range[ize] = zex2 + ize += 1 + elif PER[2]: + for zex2 in range(0, Nex_max[2] - (mi2_max-mi_split2[2])): + zex1_range[ize] = 0 + zex2_range[ize] = zex2 + ize += 1 + for ix in range(ixe): + xex1 = xex1_range[ix] + xex2 = xex2_range[ix] + for iy in range(iye): + yex1 = yex1_range[iy] + yex2 = yex2_range[iy] + for iz in range(ize): + zex1 = zex1_range[iz] + zex2 = zex2_range[iz] + if (xex1 == mi_split1[0] and xex2 == mi_split2[0] and + yex1 == mi_split1[1] and yex2 == mi_split2[1] and + zex1 == mi_split1[2] and zex2 == mi_split2[2]): + continue + miex1 = encode_morton_64bit(xex1, yex1, zex1) + miex2 = encode_morton_64bit(xex2, yex2, zex2) + if nsub_mi >= msize: + # Uncomment these lines to allow periodic + # caching of refined indices + # self.bitmasks._set_refined_index_array( + # file_id, nsub_mi, sub_mi1, sub_mi2) + # nsub_mi = 0 + raise IndexError( + "Refined index exceeded original " + "estimate.\n" + "nsub_mi = %s, " + "sub_mi1.shape[0] = %s" + % (nsub_mi, sub_mi1.shape[0])) + sub_mi1[nsub_mi] = miex1 + sub_mi2[nsub_mi] = miex2 + nsub_mi += 1 # Only subs of particles in the mask return nsub_mi + cdef np.uint64_t __fill_refined_ranges(self, np.float64_t s_ppos[3], np.float64_t radius, + np.float64_t LE[3], np.float64_t RE[3], + np.float64_t dds1[3], np.uint64_t xex, np.uint64_t yex, np.uint64_t zex, + np.float64_t dds2[3], + np.uint64_t mi1_max, np.uint64_t mi2_max, np.uint64_t miex1, + vector[bool] &refined_set) except *: + cdef int i + cdef np.uint64_t new_nsub = 0 + cdef np.uint64_t bounds_l[3], bounds_r[3] + cdef np.uint64_t miex2, mi2 + cdef np.float64_t clip_pos_l[3], clip_pos_r[3], cell_edge_l, cell_edge_r + cdef np.uint64_t ex1[3] + ex1[0] = xex; ex1[1] = yex; ex1[2] = zex + for i in range(3): + # Figure out our bounds inside our cell + cell_edge_l = ex1[i] * dds1[i] + LE[i] + cell_edge_r = (ex1[i] + 1) * dds1[i] + LE[i] + clip_pos_l[i] = fmax(s_ppos[i] - radius, cell_edge_l + dds2[i]/2.0) + clip_pos_r[i] = fmin(s_ppos[i] + radius, cell_edge_r - dds2[i]/2.0) + mi2 = bounded_morton_split_relative_dds(clip_pos_l[0], clip_pos_l[1], clip_pos_l[2], + LE, dds1, dds2, bounds_l) + mi2 = bounded_morton_split_relative_dds(clip_pos_r[0], clip_pos_r[1], clip_pos_r[2], + LE, dds1, dds2, bounds_r) + if bounds_r[0] < bounds_l[0] or bounds_r[1] < bounds_l[1] or bounds_r[2] < bounds_l[2]: + print(bounds_r[0] - bounds_l[0], bounds_r[1] - bounds_l[1], bounds_r[2] - bounds_l[2]) + raise RuntimeError + for xex2 in range(bounds_l[0], bounds_r[0] + 1): + for yex2 in range(bounds_l[1], bounds_r[1] + 1): + for zex2 in range(bounds_l[2], bounds_r[2] + 1): + miex2 = encode_morton_64bit(xex2, yex2, zex2) + refined_set[miex2] = True + new_nsub += 1 + return new_nsub + @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) From af135de3df1bae34b7b0d43e801862fdf3627d87 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Wed, 13 May 2020 15:09:48 -0500 Subject: [PATCH 17/42] Temporary commit, still not working --- yt/geometry/particle_oct_container.pyx | 39 ++++++++++++++++++-------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index 9808c6fb3f8..9962ba844c7 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -684,8 +684,8 @@ cdef class ParticleBitmap: # Determine sub index within cell of primary index mi2 = bounded_morton_split_relative_dds( ppos[0], ppos[1], ppos[2], LE, dds1, dds2, mi_split2) - if coarse_refined_map.count(mi1) == 0: - coarse_refined_map[mi1] = vector[bool](max_mi2_elements, False) + if coarse_refined_map[mi1].size() == 0: + coarse_refined_map[mi1].resize(max_mi2_elements, False) coarse_refined_map[mi1][mi2] = True else: # only hit if we have smoothing lengths. # We have to do essentially the identical process to in the coarse indexing, @@ -723,12 +723,13 @@ cdef class ParticleBitmap: if mask[miex] <= 1: continue # Now we need to fill our sub-range - if coarse_refined_map.count(miex) == 0: - coarse_refined_map[miex] = vector[bool](max_mi2_elements, False) + if coarse_refined_map[miex].size() == 0: + coarse_refined_map[miex].resize(max_mi2_elements, False) self.__fill_refined_ranges(s_ppos, radius, LE, RE, dds1, xex, yex, zex, dds2, mi1_max, mi2_max, miex, - coarse_refined_map[miex]) + coarse_refined_map[miex], ppos, mask[miex], + max_mi2_elements) print("THIS MANY COARSE CELLS", coarse_refined_map.size()) cdef np.uint64_t count for it1 in coarse_refined_map: @@ -740,7 +741,7 @@ cdef class ParticleBitmap: sub_mi1[nsub_mi] = mi1 sub_mi2[nsub_mi] = it2 #nsub_mi += 1 - print("IN ", mi1, "THIS MANY REFINED CELLS", count) + #print("IN ", mi1, "THIS MANY REFINED CELLS", count) return nsub_mi if 0: @@ -890,12 +891,17 @@ cdef class ParticleBitmap: # Only subs of particles in the mask return nsub_mi - cdef np.uint64_t __fill_refined_ranges(self, np.float64_t s_ppos[3], np.float64_t radius, + @cython.boundscheck(False) + @cython.wraparound(False) + @cython.cdivision(True) + @cython.initializedcheck(False) + cdef np.int64_t __fill_refined_ranges(self, np.float64_t s_ppos[3], np.float64_t radius, np.float64_t LE[3], np.float64_t RE[3], np.float64_t dds1[3], np.uint64_t xex, np.uint64_t yex, np.uint64_t zex, np.float64_t dds2[3], np.uint64_t mi1_max, np.uint64_t mi2_max, np.uint64_t miex1, - vector[bool] &refined_set) except *: + vector[bool] &refined_set, np.float64_t ppos[3], np.uint64_t mcount, + np.uint64_t max_mi2_elements) except -1: cdef int i cdef np.uint64_t new_nsub = 0 cdef np.uint64_t bounds_l[3], bounds_r[3] @@ -903,19 +909,30 @@ cdef class ParticleBitmap: cdef np.float64_t clip_pos_l[3], clip_pos_r[3], cell_edge_l, cell_edge_r cdef np.uint64_t ex1[3] ex1[0] = xex; ex1[1] = yex; ex1[2] = zex + # Check a few special cases for i in range(3): - # Figure out our bounds inside our cell + # Figure out our bounds inside our coarse cell, in the space of the + # full domain cell_edge_l = ex1[i] * dds1[i] + LE[i] - cell_edge_r = (ex1[i] + 1) * dds1[i] + LE[i] + cell_edge_r = cell_edge_l + dds1[i] clip_pos_l[i] = fmax(s_ppos[i] - radius, cell_edge_l + dds2[i]/2.0) clip_pos_r[i] = fmin(s_ppos[i] + radius, cell_edge_r - dds2[i]/2.0) mi2 = bounded_morton_split_relative_dds(clip_pos_l[0], clip_pos_l[1], clip_pos_l[2], LE, dds1, dds2, bounds_l) mi2 = bounded_morton_split_relative_dds(clip_pos_r[0], clip_pos_r[1], clip_pos_r[2], LE, dds1, dds2, bounds_r) + if bounds_l[0] == bounds_r[0] and bounds_l[1] == bounds_r[1] and bounds_l[2] == bounds_r[2]: + miex2 = encode_morton_64bit(bounds_l[0], bounds_l[1], bounds_l[2]) + refined_set[miex2] = True + return 1 if bounds_r[0] < bounds_l[0] or bounds_r[1] < bounds_l[1] or bounds_r[2] < bounds_l[2]: print(bounds_r[0] - bounds_l[0], bounds_r[1] - bounds_l[1], bounds_r[2] - bounds_l[2]) - raise RuntimeError + return -1 + if (bounds_l[0] == bounds_l[1] == bounds_l[2] == 0) and \ + (bounds_r[0] == bounds_r[1] == bounds_r[2] == mi2_max): + for miex2 in range(max_mi2_elements): + refined_set[miex2] = True + return max_mi2_elements for xex2 in range(bounds_l[0], bounds_r[0] + 1): for yex2 in range(bounds_l[1], bounds_r[1] + 1): for zex2 in range(bounds_l[2], bounds_r[2] + 1): From e626e259890309eedaae44131e6543e48817e5d0 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Fri, 15 May 2020 13:49:26 -0500 Subject: [PATCH 18/42] another pass --- yt/geometry/particle_oct_container.pyx | 65 +++++++++++++------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index 9962ba844c7..bb981b342b7 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -624,7 +624,7 @@ cdef class ParticleBitmap: cdef np.float64_t ppos[3] cdef np.float64_t s_ppos[3] # shifted ppos cdef int skip, Nex - cdef np.uint64_t bounds[3][2] + cdef np.uint64_t bounds[2][3] cdef np.float64_t LE[3] cdef np.float64_t RE[3] cdef np.float64_t DW[3] @@ -648,10 +648,12 @@ cdef class ParticleBitmap: cdef np.ndarray[np.uint64_t, ndim=1] xex2_range = np.empty(7, 'uint64') cdef np.ndarray[np.uint64_t, ndim=1] yex2_range = np.empty(7, 'uint64') cdef np.ndarray[np.uint64_t, ndim=1] zex2_range = np.empty(7, 'uint64') + cdef np.float64_t clip_pos_l[3], clip_pos_r[3] cdef np.int64_t msize = sub_mi1.shape[0] cdef int axiter[3][2] cdef np.float64_t axiterv[3][2] cdef CoarseRefinedSets coarse_refined_map + cdef np.uint64_t nset = 0 mi1_max = (1 << self.index_order1) - 1 mi2_max = (1 << self.index_order2) - 1 cdef np.uint64_t max_mi2_elements = 1 << (3*self.index_order2) @@ -692,6 +694,8 @@ cdef class ParticleBitmap: # except here we need to fill in all the subranges as well as the coarse ranges # Note that we are also doing the null case, where we do no shifting radius = hsml[p] + if mask[mi1] <= 1: # only one thing in this area + continue for i in range(3): if PER[i] and ppos[i] - radius < LE[i]: axiter[i][1] = +1 @@ -711,37 +715,43 @@ cdef class ParticleBitmap: # OK, now we compute the left and right edges for this shift. for i in range(3): # casting to int64 is not nice but is so we can have negative values we clip - bounds[i][0] = i64max(((s_ppos[i] - LE[i] - radius)/dds1[i]), 0) - bounds[i][1] = i64min(((s_ppos[i] - LE[i] + radius)/dds1[i]), mi1_max) + 1 + clip_pos_l[i] = fmax(s_ppos[i] - radius, LE[i] + dds1[i]/2) + clip_pos_r[i] = fmin(s_ppos[i] + radius, RE[i] - dds1[i]/2) + bounded_morton_split_dds(clip_pos_l[0], clip_pos_l[1], clip_pos_l[2], LE, dds1, bounds[0]) + bounded_morton_split_dds(clip_pos_r[0], clip_pos_r[1], clip_pos_r[2], LE, dds1, bounds[1]) # We go to the upper bound plus one so that we have *inclusive* loops -- the upper bound # is the cell *index*, so we want to make sure we include that cell. This is also why # we don't need to worry about mi_max being the max index rather than the cell count. - for xex in range(bounds[0][0], bounds[0][1]): - for yex in range(bounds[1][0], bounds[1][1]): - for zex in range(bounds[2][0], bounds[2][1]): + for xex in range(bounds[0][0], bounds[1][0] + 1): + for yex in range(bounds[0][1], bounds[1][1] + 1): + for zex in range(bounds[0][2], bounds[1][2] + 1): miex = encode_morton_64bit(xex, yex, zex) if mask[miex] <= 1: continue # Now we need to fill our sub-range if coarse_refined_map[miex].size() == 0: coarse_refined_map[miex].resize(max_mi2_elements, False) - self.__fill_refined_ranges(s_ppos, radius, LE, RE, + nset += self.__fill_refined_ranges(s_ppos, radius, LE, RE, dds1, xex, yex, zex, dds2, mi1_max, mi2_max, miex, coarse_refined_map[miex], ppos, mask[miex], max_mi2_elements) print("THIS MANY COARSE CELLS", coarse_refined_map.size()) - cdef np.uint64_t count + cdef np.uint64_t count, vec_i + cdef total_count = 0 for it1 in coarse_refined_map: mi1 = it1.first count = 0 + vec_i = 0 for it2 in it1.second: if it2 == True: count += 1 - sub_mi1[nsub_mi] = mi1 - sub_mi2[nsub_mi] = it2 - #nsub_mi += 1 + #sub_mi1[nsub_mi] = mi1 + #sub_mi2[nsub_mi] = vec_i + nsub_mi += 1 + vec_i += 1 #print("IN ", mi1, "THIS MANY REFINED CELLS", count) + total_count += count return nsub_mi if 0: @@ -905,9 +915,9 @@ cdef class ParticleBitmap: cdef int i cdef np.uint64_t new_nsub = 0 cdef np.uint64_t bounds_l[3], bounds_r[3] - cdef np.uint64_t miex2, mi2 + cdef np.uint64_t miex2, mi2, miex2_min, miex2_max cdef np.float64_t clip_pos_l[3], clip_pos_r[3], cell_edge_l, cell_edge_r - cdef np.uint64_t ex1[3] + cdef np.uint64_t ex1[3], ex2[3] ex1[0] = xex; ex1[1] = yex; ex1[2] = zex # Check a few special cases for i in range(3): @@ -917,28 +927,19 @@ cdef class ParticleBitmap: cell_edge_r = cell_edge_l + dds1[i] clip_pos_l[i] = fmax(s_ppos[i] - radius, cell_edge_l + dds2[i]/2.0) clip_pos_r[i] = fmin(s_ppos[i] + radius, cell_edge_r - dds2[i]/2.0) - mi2 = bounded_morton_split_relative_dds(clip_pos_l[0], clip_pos_l[1], clip_pos_l[2], + miex2_min = bounded_morton_split_relative_dds(clip_pos_l[0], clip_pos_l[1], clip_pos_l[2], LE, dds1, dds2, bounds_l) - mi2 = bounded_morton_split_relative_dds(clip_pos_r[0], clip_pos_r[1], clip_pos_r[2], + miex2_max = bounded_morton_split_relative_dds(clip_pos_r[0], clip_pos_r[1], clip_pos_r[2], LE, dds1, dds2, bounds_r) - if bounds_l[0] == bounds_r[0] and bounds_l[1] == bounds_r[1] and bounds_l[2] == bounds_r[2]: - miex2 = encode_morton_64bit(bounds_l[0], bounds_l[1], bounds_l[2]) + for miex2 in range(miex2_min, miex2_max + 1): + #miex2 = encode_morton_64bit(xex2, yex2, zex2) + decode_morton_64bit(miex2, ex2) + if ex2[0] < bounds_l[0] or ex2[0] > bounds_r[0] or \ + ex2[1] < bounds_l[1] or ex2[1] > bounds_r[1] or \ + ex2[2] < bounds_l[2] or ex2[2] > bounds_r[2]: + continue refined_set[miex2] = True - return 1 - if bounds_r[0] < bounds_l[0] or bounds_r[1] < bounds_l[1] or bounds_r[2] < bounds_l[2]: - print(bounds_r[0] - bounds_l[0], bounds_r[1] - bounds_l[1], bounds_r[2] - bounds_l[2]) - return -1 - if (bounds_l[0] == bounds_l[1] == bounds_l[2] == 0) and \ - (bounds_r[0] == bounds_r[1] == bounds_r[2] == mi2_max): - for miex2 in range(max_mi2_elements): - refined_set[miex2] = True - return max_mi2_elements - for xex2 in range(bounds_l[0], bounds_r[0] + 1): - for yex2 in range(bounds_l[1], bounds_r[1] + 1): - for zex2 in range(bounds_l[2], bounds_r[2] + 1): - miex2 = encode_morton_64bit(xex2, yex2, zex2) - refined_set[miex2] = True - new_nsub += 1 + new_nsub += 1 return new_nsub @cython.boundscheck(False) From b1c0d74e382893f61c7700a22fcda4427d8fe8a6 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Fri, 15 May 2020 15:03:01 -0500 Subject: [PATCH 19/42] try to short circuit, and fix cython bugs --- yt/geometry/particle_oct_container.pyx | 33 ++++++++++++++++++-------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index bb981b342b7..9e1bcaf4728 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -639,6 +639,7 @@ cdef class ParticleBitmap: cdef int Nex_max[3] cdef np.float64_t rpos_min, rpos_max cdef np.uint64_t xex2_min, xex2_max, yex2_min, yex2_max, zex2_min, zex2_max + cdef np.uint64_t xex, yex, zex cdef np.uint64_t xex1, yex1, zex1 cdef np.uint64_t xex2, yex2, zex2 cdef int ix, iy, iz, ixe, iye, ize @@ -653,6 +654,7 @@ cdef class ParticleBitmap: cdef int axiter[3][2] cdef np.float64_t axiterv[3][2] cdef CoarseRefinedSets coarse_refined_map + cdef map[np.uint64_t, np.uint64_t] refined_count cdef np.uint64_t nset = 0 mi1_max = (1 << self.index_order1) - 1 mi2_max = (1 << self.index_order2) - 1 @@ -688,7 +690,10 @@ cdef class ParticleBitmap: ppos[0], ppos[1], ppos[2], LE, dds1, dds2, mi_split2) if coarse_refined_map[mi1].size() == 0: coarse_refined_map[mi1].resize(max_mi2_elements, False) - coarse_refined_map[mi1][mi2] = True + refined_count[mi1] = 0 + if coarse_refined_map[mi1][mi2] == False: + coarse_refined_map[mi1][mi2] = True + refined_count[mi1] += 1 else: # only hit if we have smoothing lengths. # We have to do essentially the identical process to in the coarse indexing, # except here we need to fill in all the subranges as well as the coarse ranges @@ -725,18 +730,22 @@ cdef class ParticleBitmap: for xex in range(bounds[0][0], bounds[1][0] + 1): for yex in range(bounds[0][1], bounds[1][1] + 1): for zex in range(bounds[0][2], bounds[1][2] + 1): - miex = encode_morton_64bit(xex, yex, zex) - if mask[miex] <= 1: + miex1 = encode_morton_64bit(xex, yex, zex) + if mask[miex1] <= 1: continue # Now we need to fill our sub-range - if coarse_refined_map[miex].size() == 0: - coarse_refined_map[miex].resize(max_mi2_elements, False) - nset += self.__fill_refined_ranges(s_ppos, radius, LE, RE, + if coarse_refined_map[miex1].size() == 0: + coarse_refined_map[miex1].resize(max_mi2_elements, False) + refined_count[miex1] = 0 + if refined_count[miex1] >= max_mi2_elements: + continue + refined_count[miex1] += self.__fill_refined_ranges(s_ppos, radius, LE, RE, dds1, xex, yex, zex, - dds2, mi1_max, mi2_max, miex, - coarse_refined_map[miex], ppos, mask[miex], + dds2, mi1_max, mi2_max, miex1, + coarse_refined_map[miex1], ppos, mask[miex1], max_mi2_elements) print("THIS MANY COARSE CELLS", coarse_refined_map.size()) + print("THIS MANY NSET", nset, nset / pos.shape[0], nsub_mi) cdef np.uint64_t count, vec_i cdef total_count = 0 for it1 in coarse_refined_map: @@ -750,8 +759,11 @@ cdef class ParticleBitmap: #sub_mi2[nsub_mi] = vec_i nsub_mi += 1 vec_i += 1 + if count != refined_count[mi1]: + print("WHY IS THIS WRONG", count, refined_count[mi1]) #print("IN ", mi1, "THIS MANY REFINED CELLS", count) total_count += count + print("NSUB_MI NOW", total_count, total_count / (coarse_refined_map.size() * max_mi2_elements), nsub_mi, sub_mi1.shape[0], sub_mi2.shape[0]) return nsub_mi if 0: @@ -938,8 +950,9 @@ cdef class ParticleBitmap: ex2[1] < bounds_l[1] or ex2[1] > bounds_r[1] or \ ex2[2] < bounds_l[2] or ex2[2] > bounds_r[2]: continue - refined_set[miex2] = True - new_nsub += 1 + if refined_set[miex2] == False: + refined_set[miex2] = True + new_nsub += 1 return new_nsub @cython.boundscheck(False) From bc7121caef2c94a276dc580b74488f837b278c0a Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Tue, 19 May 2020 15:17:27 -0500 Subject: [PATCH 20/42] Use expanded morton for faster BIGMAX selection --- yt/geometry/particle_oct_container.pyx | 35 +++++++++++++++----------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index 9e1bcaf4728..b8409d64b0d 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -747,23 +747,23 @@ cdef class ParticleBitmap: print("THIS MANY COARSE CELLS", coarse_refined_map.size()) print("THIS MANY NSET", nset, nset / pos.shape[0], nsub_mi) cdef np.uint64_t count, vec_i - cdef total_count = 0 + cdef np.uint64_t total_count = 0 for it1 in coarse_refined_map: mi1 = it1.first count = 0 vec_i = 0 - for it2 in it1.second: - if it2 == True: + for vec_i in range(it1.second.size()): + if it1.second[vec_i] == True: count += 1 #sub_mi1[nsub_mi] = mi1 #sub_mi2[nsub_mi] = vec_i nsub_mi += 1 - vec_i += 1 if count != refined_count[mi1]: print("WHY IS THIS WRONG", count, refined_count[mi1]) #print("IN ", mi1, "THIS MANY REFINED CELLS", count) total_count += count - print("NSUB_MI NOW", total_count, total_count / (coarse_refined_map.size() * max_mi2_elements), nsub_mi, sub_mi1.shape[0], sub_mi2.shape[0]) + if coarse_refined_map.size() > 0: + print("NSUB_MI NOW", total_count, total_count / (coarse_refined_map.size() * max_mi2_elements), nsub_mi, sub_mi1.shape[0], sub_mi2.shape[0]) return nsub_mi if 0: @@ -929,7 +929,8 @@ cdef class ParticleBitmap: cdef np.uint64_t bounds_l[3], bounds_r[3] cdef np.uint64_t miex2, mi2, miex2_min, miex2_max cdef np.float64_t clip_pos_l[3], clip_pos_r[3], cell_edge_l, cell_edge_r - cdef np.uint64_t ex1[3], ex2[3] + cdef np.uint64_t ex1[3], ex2[3], ex3[3] + cdef np.uint64_t xex_max, yex_max, zex_max ex1[0] = xex; ex1[1] = yex; ex1[2] = zex # Check a few special cases for i in range(3): @@ -943,16 +944,22 @@ cdef class ParticleBitmap: LE, dds1, dds2, bounds_l) miex2_max = bounded_morton_split_relative_dds(clip_pos_r[0], clip_pos_r[1], clip_pos_r[2], LE, dds1, dds2, bounds_r) + xex_max = encode_morton_64bit(mi2_max, 0, 0) + yex_max = encode_morton_64bit(0, mi2_max, 0) + zex_max = encode_morton_64bit(0, 0, mi2_max) for miex2 in range(miex2_min, miex2_max + 1): #miex2 = encode_morton_64bit(xex2, yex2, zex2) - decode_morton_64bit(miex2, ex2) - if ex2[0] < bounds_l[0] or ex2[0] > bounds_r[0] or \ - ex2[1] < bounds_l[1] or ex2[1] > bounds_r[1] or \ - ex2[2] < bounds_l[2] or ex2[2] > bounds_r[2]: - continue - if refined_set[miex2] == False: - refined_set[miex2] = True - new_nsub += 1 + #decode_morton_64bit(miex2, ex2) + # Let's check all our cases here + if refined_set[miex2] == True: continue + if (miex2 & xex_max) < (miex2_min & xex_max): continue + if (miex2 & yex_max) < (miex2_min & yex_max): continue + if (miex2 & zex_max) < (miex2_min & zex_max): continue + if (miex2 & xex_max) > (miex2_max & xex_max): continue + if (miex2 & yex_max) > (miex2_max & yex_max): continue + if (miex2 & zex_max) > (miex2_max & zex_max): continue + refined_set[miex2] = True + new_nsub += 1 return new_nsub @cython.boundscheck(False) From 300fc4afd49379d2e1333d7381bbbcb4e103d48d Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Tue, 19 May 2020 15:12:53 -0500 Subject: [PATCH 21/42] Give up on hiding the C++ in EWAH --- yt/geometry/selection_routines.pxd | 2 - yt/geometry/selection_routines.pyx | 3 - yt/utilities/lib/ewah_bool_wrap.pxd | 37 +-- yt/utilities/lib/ewah_bool_wrap.pyx | 334 ++++++++-------------------- 4 files changed, 119 insertions(+), 257 deletions(-) diff --git a/yt/geometry/selection_routines.pxd b/yt/geometry/selection_routines.pxd index 81d02dbb6cd..8ad6c687d63 100644 --- a/yt/geometry/selection_routines.pxd +++ b/yt/geometry/selection_routines.pxd @@ -12,8 +12,6 @@ from oct_visitors cimport Oct, OctVisitor from oct_container cimport OctreeContainer from grid_visitors cimport GridTreeNode, GridVisitorData, \ grid_visitor_function, check_child_masked -from yt.utilities.lib.ewah_bool_wrap cimport \ - BoolArrayCollection from yt.utilities.lib.geometry_utils cimport decode_morton_64bit from yt.utilities.lib.fp_utils cimport _ensure_code diff --git a/yt/geometry/selection_routines.pyx b/yt/geometry/selection_routines.pyx index 694ceb9448a..f4ba17b2365 100644 --- a/yt/geometry/selection_routines.pyx +++ b/yt/geometry/selection_routines.pyx @@ -23,9 +23,6 @@ from yt.utilities.lib.volume_container cimport \ from yt.utilities.lib.grid_traversal cimport \ sampler_function, walk_volume from yt.utilities.lib.bitarray cimport ba_get_value, ba_set_value -from yt.utilities.lib.ewah_bool_wrap cimport BoolArrayCollection -# from yt.utilities.lib.ewah_bool_wrap cimport SparseUnorderedBitmaskSet #as SparseUnorderedBitmask -# from yt.utilities.lib.ewah_bool_wrap cimport SparseUnorderedRefinedBitmaskSet #as SparseUnorderedRefinedBitmask from yt.utilities.lib.geometry_utils cimport encode_morton_64bit, decode_morton_64bit, \ bounded_morton_dds, morton_neighbors_coarse, morton_neighbors_refined diff --git a/yt/utilities/lib/ewah_bool_wrap.pxd b/yt/utilities/lib/ewah_bool_wrap.pxd index aa239d299d8..589d56a028c 100644 --- a/yt/utilities/lib/ewah_bool_wrap.pxd +++ b/yt/utilities/lib/ewah_bool_wrap.pxd @@ -1,10 +1,19 @@ cimport numpy as np +from libcpp.vector cimport vector +from libcpp.set cimport set as cset +from libcpp.pair cimport pair + +from yt.utilities.lib.ewah_bool_array cimport \ + sstream, ewah_map, ewah_bool_array, ewah_bool_iterator + +ctypedef bint bitarrtype +ctypedef pair[np.uint64_t, np.uint64_t] ind_pair cdef class FileBitmasks: cdef np.uint32_t nfiles - cdef void** ewah_coll - cdef void** ewah_keys - cdef void** ewah_refn + cdef ewah_map** ewah_coll + cdef ewah_bool_array** ewah_keys + cdef ewah_bool_array** ewah_refn cdef void _reset(self) cdef bint _iseq(self, FileBitmasks solf) @@ -43,10 +52,10 @@ cdef class FileBitmasks: cdef bint _check(self) cdef class BoolArrayCollection: - cdef void* ewah_coll - cdef void* ewah_keys - cdef void* ewah_refn - cdef void* ewah_coar + cdef ewah_map* ewah_coll + cdef ewah_bool_array* ewah_keys + cdef ewah_bool_array* ewah_refn + cdef ewah_bool_array* ewah_coar cdef void _reset(self) cdef int _richcmp(self, BoolArrayCollection solf, int op) except -1 @@ -85,9 +94,9 @@ cdef class BoolArrayCollection: cdef class BoolArrayCollectionUncompressed: cdef int nele1 cdef int nele2 - cdef void* ewah_coll - cdef void* ewah_keys - cdef void* ewah_refn + cdef ewah_map* ewah_coll + cdef bitarrtype* ewah_keys + cdef bitarrtype* ewah_refn cdef void _set(self, np.uint64_t i1, np.uint64_t i2=*) cdef void _set_coarse(self, np.uint64_t i1) @@ -108,7 +117,7 @@ cdef class BoolArrayCollectionUncompressed: cdef void _compress(self, BoolArrayCollection solf) cdef class SparseUnorderedBitmaskSet: - cdef void* entries + cdef cset[np.uint64_t] entries cdef void _set(self, np.uint64_t ind) cdef void _fill(self, np.uint8_t[:] mask) cdef void _fill_ewah(self, BoolArrayCollection mm) @@ -118,7 +127,7 @@ cdef class SparseUnorderedBitmaskSet: cdef class SparseUnorderedBitmaskVector: cdef int total - cdef void* entries + cdef vector[np.uint64_t] entries cdef void _set(self, np.uint64_t ind) cdef void _fill(self, np.uint8_t[:] mask) cdef void _fill_ewah(self, BoolArrayCollection mm) @@ -129,7 +138,7 @@ cdef class SparseUnorderedBitmaskVector: cdef void _prune(self) cdef class SparseUnorderedRefinedBitmaskSet: - cdef void* entries + cdef cset[ind_pair] entries cdef void _set(self, np.uint64_t ind1, np.uint64_t ind2) cdef void _fill(self, np.uint8_t[:] mask1, np.uint8_t[:]) cdef void _fill_ewah(self, BoolArrayCollection mm) @@ -139,7 +148,7 @@ cdef class SparseUnorderedRefinedBitmaskSet: cdef class SparseUnorderedRefinedBitmaskVector: cdef int total - cdef void* entries + cdef vector[ind_pair] entries cdef void _set(self, np.uint64_t ind1, np.uint64_t ind2) cdef void _fill(self, np.uint8_t[:] mask1, np.uint8_t[:]) cdef void _fill_ewah(self, BoolArrayCollection mm) diff --git a/yt/utilities/lib/ewah_bool_wrap.pyx b/yt/utilities/lib/ewah_bool_wrap.pyx index b4ced2c87bd..f25a785386e 100644 --- a/yt/utilities/lib/ewah_bool_wrap.pyx +++ b/yt/utilities/lib/ewah_bool_wrap.pyx @@ -8,14 +8,9 @@ Wrapper for EWAH Bool Array: https://github.com/lemire/EWAHBoolArray import struct from libcpp.map cimport map as cmap -from libcpp.vector cimport vector -from libcpp.pair cimport pair -from libcpp.set cimport set as cset from libcpp.map cimport map from libcpp.algorithm cimport sort from libc.stdlib cimport malloc, free, qsort -from yt.utilities.lib.ewah_bool_array cimport \ - sstream, ewah_map, ewah_bool_array, ewah_bool_iterator from cython.operator cimport dereference, preincrement import numpy as np cimport numpy as np @@ -29,12 +24,6 @@ cdef extern from "" namespace "std" nogil: cdef np.uint64_t FLAG = ~(0) cdef np.uint64_t MAX_VECTOR_SIZE = 1e7 -DEF UncompressedFormat = 'Pointer' - -#ctypedef np.uint8_t bitarrtype -ctypedef bint bitarrtype - -ctypedef pair[np.uint64_t, np.uint64_t] ind_pair ctypedef cmap[np.uint64_t, ewah_bool_array] ewahmap ctypedef cmap[np.uint64_t, ewah_bool_array].iterator ewahmap_it ctypedef pair[np.uint64_t, ewah_bool_array] ewahmap_p @@ -44,26 +33,20 @@ cdef class FileBitmasks: def __cinit__(self, np.uint32_t nfiles): cdef int i self.nfiles = nfiles - cdef ewah_bool_array **ewah_keys = malloc(nfiles*sizeof(ewah_bool_array*)) - cdef ewah_bool_array **ewah_refn = malloc(nfiles*sizeof(ewah_bool_array*)) - cdef ewah_map **ewah_coll = malloc(nfiles*sizeof(ewah_map*)) + self.ewah_keys = malloc(nfiles*sizeof(ewah_bool_array*)) + self.ewah_refn = malloc(nfiles*sizeof(ewah_bool_array*)) + self.ewah_coll = malloc(nfiles*sizeof(ewah_map*)) for i in range(nfiles): - ewah_keys[i] = new ewah_bool_array() - ewah_refn[i] = new ewah_bool_array() - ewah_coll[i] = new ewah_map() - self.ewah_keys = ewah_keys - self.ewah_refn = ewah_refn - self.ewah_coll = ewah_coll + self.ewah_keys[i] = new ewah_bool_array() + self.ewah_refn[i] = new ewah_bool_array() + self.ewah_coll[i] = new ewah_map() cdef void _reset(self): - cdef ewah_bool_array **ewah_keys = self.ewah_keys - cdef ewah_bool_array **ewah_refn = self.ewah_refn - cdef ewah_map **ewah_coll = self.ewah_coll cdef np.int32_t ifile for ifile in range(self.nfiles): - ewah_keys[ifile][0].reset() - ewah_refn[ifile][0].reset() - ewah_coll[ifile][0].clear() + self.ewah_keys[ifile].reset() + self.ewah_refn[ifile].reset() + self.ewah_coll[ifile].clear() cdef bint _iseq(self, FileBitmasks solf): cdef np.int32_t ifile @@ -644,24 +627,16 @@ cdef class FileBitmasks: cdef class BoolArrayCollection: def __cinit__(self): - cdef ewah_bool_array *ewah_keys = new ewah_bool_array() - cdef ewah_bool_array *ewah_refn = new ewah_bool_array() - cdef ewah_bool_array *ewah_coar = new ewah_bool_array() - cdef ewah_map *ewah_coll = new ewah_map() - self.ewah_keys = ewah_keys - self.ewah_refn = ewah_refn - self.ewah_coar = ewah_coar - self.ewah_coll = ewah_coll + self.ewah_keys = new ewah_bool_array() + self.ewah_refn = new ewah_bool_array() + self.ewah_coar = new ewah_bool_array() + self.ewah_coll = new ewah_map() cdef void _reset(self): - cdef ewah_bool_array *ewah_keys = self.ewah_keys - cdef ewah_bool_array *ewah_refn = self.ewah_refn - cdef ewah_bool_array *ewah_coar = self.ewah_coar - cdef ewah_map *ewah_coll = self.ewah_coll - ewah_keys[0].reset() - ewah_refn[0].reset() - ewah_coar[0].reset() - ewah_coll[0].clear() + self.ewah_keys[0].reset() + self.ewah_refn[0].reset() + self.ewah_coar[0].reset() + self.ewah_coll[0].clear() cdef int _richcmp(self, BoolArrayCollection solf, int op) except -1: @@ -1322,25 +1297,13 @@ cdef class BoolArrayCollectionUncompressed: def __cinit__(self, np.uint64_t nele1, np.uint64_t nele2): self.nele1 = nele1 self.nele2 = nele2 - cdef ewah_map *ewah_coll = new ewah_map() - self.ewah_coll = ewah_coll + self.ewah_coll = new ewah_map() cdef np.uint64_t i - IF UncompressedFormat == 'MemoryView': - self.ewah_keys = malloc(sizeof(bitarrtype)*nele1) - self.ewah_refn = malloc(sizeof(bitarrtype)*nele1) - cdef bitarrtype[:] ewah_keys = self.ewah_keys - cdef bitarrtype[:] ewah_refn = self.ewah_refn - for i in range(nele1): - ewah_keys[i] = 0 - ewah_refn[i] = 0 - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_keys = malloc(sizeof(bitarrtype)*nele1) - cdef bitarrtype *ewah_refn = malloc(sizeof(bitarrtype)*nele1) - for i in range(nele1): - ewah_keys[i] = 0 - ewah_refn[i] = 0 - self.ewah_keys = ewah_keys - self.ewah_refn = ewah_refn + self.ewah_keys = malloc(sizeof(bitarrtype)*nele1) + self.ewah_refn = malloc(sizeof(bitarrtype)*nele1) + for i in range(nele1): + self.ewah_keys[i] = 0 + self.ewah_refn[i] = 0 def reset(self): self.__dealloc__() @@ -1350,12 +1313,8 @@ cdef class BoolArrayCollectionUncompressed: cdef np.uint64_t i cdef ewah_bool_array *ewah_keys = solf.ewah_keys cdef ewah_bool_array *ewah_refn = solf.ewah_refn - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] bool_keys = self.ewah_keys - cdef bitarrtype[:] bool_refn = self.ewah_refn - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *bool_keys = self.ewah_keys - cdef bitarrtype *bool_refn = self.ewah_refn + cdef bitarrtype *bool_keys = self.ewah_keys + cdef bitarrtype *bool_refn = self.ewah_refn for i in range(self.nele1): if bool_keys[i] == 1: ewah_keys[0].set(i) @@ -1366,12 +1325,8 @@ cdef class BoolArrayCollectionUncompressed: ewah_coll2[0] = ewah_coll1[0] cdef void _set(self, np.uint64_t i1, np.uint64_t i2 = FLAG): - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] ewah_keys = self.ewah_keys - cdef bitarrtype[:] ewah_refn = self.ewah_refn - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_keys = self.ewah_keys - cdef bitarrtype *ewah_refn = self.ewah_refn + cdef bitarrtype *ewah_keys = self.ewah_keys + cdef bitarrtype *ewah_refn = self.ewah_refn cdef ewah_map *ewah_coll = self.ewah_coll ewah_keys[i1] = 1 # Note the 0 here, for dereferencing @@ -1380,17 +1335,11 @@ cdef class BoolArrayCollectionUncompressed: ewah_coll[0][i1].set(i2) cdef void _set_coarse(self, np.uint64_t i1): - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] ewah_keys = self.ewah_keys - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_keys = self.ewah_keys + cdef bitarrtype *ewah_keys = self.ewah_keys ewah_keys[i1] = 1 cdef void _set_refined(self, np.uint64_t i1, np.uint64_t i2): - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] ewah_refn = self.ewah_refn - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_refn = self.ewah_refn + cdef bitarrtype *ewah_refn = self.ewah_refn cdef ewah_map *ewah_coll = self.ewah_coll # Note the 0 here, for dereferencing ewah_refn[i1] = 1 @@ -1401,10 +1350,7 @@ cdef class BoolArrayCollectionUncompressed: @cython.cdivision(True) @cython.initializedcheck(False) cdef void _set_coarse_array(self, np.uint8_t[:] arr): - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] ewah_keys = self.ewah_keys - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_keys = self.ewah_keys + cdef bitarrtype *ewah_keys = self.ewah_keys cdef np.uint64_t i1 for i1 in range(arr.shape[0]): if arr[i1] == 1: @@ -1415,11 +1361,8 @@ cdef class BoolArrayCollectionUncompressed: @cython.cdivision(True) @cython.initializedcheck(False) cdef void _set_coarse_array_ptr(self, np.uint8_t *arr): - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] ewah_keys = self.ewah_keys - ELIF UncompressedFormat == 'Pointer': - # TODO: memcpy? - cdef bitarrtype *ewah_keys = self.ewah_keys + # TODO: memcpy? + cdef bitarrtype *ewah_keys = self.ewah_keys cdef np.uint64_t i1 for i1 in range(self.nele1): if arr[i1] == 1: @@ -1430,10 +1373,7 @@ cdef class BoolArrayCollectionUncompressed: @cython.cdivision(True) @cython.initializedcheck(False) cdef void _set_refined_array(self, np.uint64_t i1, np.uint8_t[:] arr): - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] ewah_refn = self.ewah_refn - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_refn = self.ewah_refn + cdef bitarrtype *ewah_refn = self.ewah_refn cdef ewah_map *ewah_coll = self.ewah_coll cdef np.uint64_t i2 for i2 in range(arr.shape[0]): @@ -1446,10 +1386,7 @@ cdef class BoolArrayCollectionUncompressed: @cython.cdivision(True) @cython.initializedcheck(False) cdef void _set_refined_array_ptr(self, np.uint64_t i1, np.uint8_t *arr): - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] ewah_refn = self.ewah_refn - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_refn = self.ewah_refn + cdef bitarrtype *ewah_refn = self.ewah_refn cdef ewah_map *ewah_coll = self.ewah_coll cdef np.uint64_t i2 cdef ewah_bool_array *barr = &ewah_coll[0][i1] @@ -1463,19 +1400,12 @@ cdef class BoolArrayCollectionUncompressed: ewah_coll[0][i1].set(i2) cdef void _set_refn(self, np.uint64_t i1): - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] ewah_refn = self.ewah_refn - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_refn = self.ewah_refn + cdef bitarrtype *ewah_refn = self.ewah_refn ewah_refn[i1] = 1 cdef bint _get(self, np.uint64_t i1, np.uint64_t i2 = FLAG): - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] ewah_keys = self.ewah_keys - cdef bitarrtype[:] ewah_refn = self.ewah_refn - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_keys = self.ewah_keys - cdef bitarrtype *ewah_refn = self.ewah_refn + cdef bitarrtype *ewah_keys = self.ewah_keys + cdef bitarrtype *ewah_refn = self.ewah_refn cdef ewah_map *ewah_coll = self.ewah_coll # Note the 0 here, for dereferencing if ewah_keys[i1] == 0: return 0 @@ -1484,26 +1414,17 @@ cdef class BoolArrayCollectionUncompressed: return ewah_coll[0][i1].get(i2) cdef bint _get_coarse(self, np.uint64_t i1): - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] ewah_keys = self.ewah_keys - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_keys = self.ewah_keys + cdef bitarrtype *ewah_keys = self.ewah_keys return ewah_keys[i1] # if (ewah_keys[i1] == 0): return 0 # return 1 cdef bint _isref(self, np.uint64_t i): - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] ewah_refn = self.ewah_refn - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_refn = self.ewah_refn + cdef bitarrtype *ewah_refn = self.ewah_refn return ewah_refn[i] cdef int _count_total(self): - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] ewah_keys = self.ewah_keys - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_keys = self.ewah_keys + cdef bitarrtype *ewah_keys = self.ewah_keys cdef np.uint64_t i cdef int out = 0 for i in range(self.nele1): @@ -1511,10 +1432,7 @@ cdef class BoolArrayCollectionUncompressed: return out cdef int _count_refined(self): - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] ewah_refn = self.ewah_refn - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_refn = self.ewah_refn + cdef bitarrtype *ewah_refn = self.ewah_refn cdef np.uint64_t i cdef int out = 0 for i in range(self.nele1): @@ -1522,16 +1440,10 @@ cdef class BoolArrayCollectionUncompressed: return out cdef void _append(self, BoolArrayCollectionUncompressed solf): - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] ewah_keys1 = self.ewah_keys - cdef bitarrtype[:] ewah_refn1 = self.ewah_refn - cdef bitarrtype[:] ewah_keys2 = solf.ewah_keys - cdef bitarrtype[:] ewah_refn2 = solf.ewah_refn - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_keys1 = self.ewah_keys - cdef bitarrtype *ewah_refn1 = self.ewah_refn - cdef bitarrtype *ewah_keys2 = solf.ewah_keys - cdef bitarrtype *ewah_refn2 = solf.ewah_refn + cdef bitarrtype *ewah_keys1 = self.ewah_keys + cdef bitarrtype *ewah_refn1 = self.ewah_refn + cdef bitarrtype *ewah_keys2 = solf.ewah_keys + cdef bitarrtype *ewah_refn2 = solf.ewah_refn cdef ewahmap *ewah_coll1 = self.ewah_coll cdef ewahmap *ewah_coll2 = solf.ewah_coll cdef ewahmap_it it_map1, it_map2 @@ -1561,16 +1473,10 @@ cdef class BoolArrayCollectionUncompressed: preincrement(it_map2) cdef bint _intersects(self, BoolArrayCollectionUncompressed solf): - IF UncompressedFormat == 'MemoryView': - cdef bitarrtype[:] ewah_keys1 = self.ewah_keys - cdef bitarrtype[:] ewah_refn1 = self.ewah_refn - cdef bitarrtype[:] ewah_keys2 = solf.ewah_keys - cdef bitarrtype[:] ewah_refn2 = solf.ewah_refn - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_keys1 = self.ewah_keys - cdef bitarrtype *ewah_refn1 = self.ewah_refn - cdef bitarrtype *ewah_keys2 = solf.ewah_keys - cdef bitarrtype *ewah_refn2 = solf.ewah_refn + cdef bitarrtype *ewah_keys1 = self.ewah_keys + cdef bitarrtype *ewah_refn1 = self.ewah_refn + cdef bitarrtype *ewah_keys2 = solf.ewah_keys + cdef bitarrtype *ewah_refn2 = solf.ewah_refn cdef ewahmap *ewah_coll1 = self.ewah_coll cdef ewahmap *ewah_coll2 = solf.ewah_coll cdef ewahmap_it it_map1, it_map2 @@ -1602,14 +1508,10 @@ cdef class BoolArrayCollectionUncompressed: return 0 def __dealloc__(self): - IF UncompressedFormat == 'MemoryView': - free(self.ewah_keys) - free(self.ewah_refn) - ELIF UncompressedFormat == 'Pointer': - cdef bitarrtype *ewah_keys = self.ewah_keys - cdef bitarrtype *ewah_refn = self.ewah_refn - free(ewah_keys) - free(ewah_refn) + cdef bitarrtype *ewah_keys = self.ewah_keys + cdef bitarrtype *ewah_refn = self.ewah_refn + free(ewah_keys) + free(ewah_refn) cdef ewah_map *ewah_coll = self.ewah_coll del ewah_coll @@ -1626,13 +1528,10 @@ cdef class BoolArrayCollectionUncompressed: # Vector version cdef class SparseUnorderedBitmaskVector: def __cinit__(self): - cdef vector[np.uint64_t] *entries = new vector[np.uint64_t]() - self.entries = entries self.total = 0 cdef void _set(self, np.uint64_t ind): - cdef vector[np.uint64_t] *entries = self.entries - entries[0].push_back(ind) + self.entries.push_back(ind) self.total += 1 def set(self, ind): @@ -1640,47 +1539,41 @@ cdef class SparseUnorderedBitmaskVector: cdef void _fill(self, np.uint8_t[:] mask): cdef np.uint64_t i, ind - cdef vector[np.uint64_t] *entries = self.entries - for i in range(entries[0].size()): - ind = entries[0][i] + for i in range(self.entries.size()): + ind = self.entries[i] mask[ind] = 1 cdef void _fill_ewah(self, BoolArrayCollection mm): self._remove_duplicates() cdef np.uint64_t i, ind - cdef vector[np.uint64_t] *entries = self.entries - for i in range(entries[0].size()): - ind = entries[0][i] + for i in range(self.entries.size()): + ind = self.entries[i] mm._set_coarse(ind) cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm): self._remove_duplicates() cdef np.uint64_t i, ind - cdef vector[np.uint64_t] *entries = self.entries - for i in range(entries[0].size()): - ind = entries[0][i] + for i in range(self.entries.size()): + ind = self.entries[i] mm._set_coarse(ind) cdef void _reset(self): - cdef vector[np.uint64_t] *entries = self.entries - entries[0].erase(entries[0].begin(), entries[0].end()) + self.entries.erase(self.entries.begin(), self.entries.end()) self.total = 0 cdef to_array(self): self._remove_duplicates() cdef np.ndarray[np.uint64_t, ndim=1] rv - cdef vector[np.uint64_t] *entries = self.entries - rv = np.empty(entries[0].size(), dtype='uint64') - for i in range(entries[0].size()): - rv[i] = entries[0][i] + rv = np.empty(self.entries.size(), dtype='uint64') + for i in range(self.entries.size()): + rv[i] = self.entries[i] return rv cdef void _remove_duplicates(self): - cdef vector[np.uint64_t] *entries = self.entries cdef vector[np.uint64_t].iterator last - sort(entries[0].begin(), entries[0].end()) - last = unique(entries[0].begin(), entries[0].end()) - entries[0].erase(last, entries[0].end()) + sort(self.entries.begin(), self.entries.end()) + last = unique(self.entries.begin(), self.entries.end()) + self.entries.erase(last, self.entries.end()) cdef void _prune(self): if self.total > MAX_VECTOR_SIZE: @@ -1688,50 +1581,39 @@ cdef class SparseUnorderedBitmaskVector: self.total = 0 def __dealloc__(self): - cdef vector[np.uint64_t] *entries = self.entries - del entries + self.entries.clear() # Set version cdef class SparseUnorderedBitmaskSet: - def __cinit__(self): - cdef cset[np.uint64_t] *entries = new cset[np.uint64_t]() - self.entries = entries - cdef void _set(self, np.uint64_t ind): - cdef cset[np.uint64_t] *entries = self.entries - entries[0].insert(ind) + self.entries.insert(ind) def set(self, ind): self._set(ind) cdef void _fill(self, np.uint8_t[:] mask): - cdef cset[np.uint64_t] *entries = self.entries - for it in entries[0]: + for it in self.entries: mask[it] = 1 cdef void _fill_ewah(self, BoolArrayCollection mm): - cdef cset[np.uint64_t] *entries = self.entries - for it in entries[0]: + for it in self.entries: mm._set_coarse(it) cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm): - cdef cset[np.uint64_t] *entries = self.entries - for it in entries[0]: + for it in self.entries: mm._set_coarse(it) cdef void _reset(self): - cdef cset[np.uint64_t] *entries = self.entries - entries[0].clear() + self.entries.clear() cdef to_array(self): cdef np.uint64_t ind cdef np.ndarray[np.uint64_t, ndim=1] rv - cdef cset[np.uint64_t] *entries = self.entries cdef cset[np.uint64_t].iterator it - rv = np.empty(entries[0].size(), dtype='uint64') - it = entries[0].begin() + rv = np.empty(self.entries.size(), dtype='uint64') + it = self.entries.begin() i = 0 - while it != entries[0].end(): + while it != self.entries.end(): ind = dereference(it) rv[i] = ind preincrement(it) @@ -1739,69 +1621,58 @@ cdef class SparseUnorderedBitmaskSet: return rv def __dealloc__(self): - cdef cset[np.uint64_t] *entries = self.entries - del entries + self.entries.clear() # vector version cdef class SparseUnorderedRefinedBitmaskVector: def __cinit__(self): - cdef vector[ind_pair] *entries = new vector[ind_pair]() - self.entries = entries self.total = 0 cdef void _set(self, np.uint64_t ind1, np.uint64_t ind2): cdef ind_pair ind - cdef vector[ind_pair] *entries = self.entries ind.first = ind1 ind.second = ind2 - entries[0].push_back(ind) + self.entries.push_back(ind) self.total += 1 - def set(self, ind1, ind2): self._set(ind1, ind2) cdef void _fill(self, np.uint8_t[:] mask1, np.uint8_t[:] mask2): - cdef vector[ind_pair] *entries = self.entries - for it in entries[0]: + for it in self.entries: mask1[it.first] = mask2[it.second] = 1 cdef void _fill_ewah(self, BoolArrayCollection mm): self._remove_duplicates() - cdef vector[ind_pair] *entries = self.entries - for it in entries[0]: + for it in self.entries: mm._set_refined(it.first, it.second) cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm): self._remove_duplicates() - cdef vector[ind_pair] *entries = self.entries - for it in entries[0]: + for it in self.entries: mm._set_refined(it.first, it.second) cdef void _reset(self): - cdef vector[ind_pair] *entries = self.entries - entries[0].erase(entries[0].begin(), entries[0].end()) + self.entries.erase(self.entries.begin(), self.entries.end()) self.total = 0 cdef to_array(self): cdef int i cdef np.ndarray[np.uint64_t, ndim=2] rv self._remove_duplicates() - cdef vector[ind_pair] *entries = self.entries - rv = np.empty((entries[0].size(),2),dtype='uint64') + rv = np.empty((self.entries.size(),2),dtype='uint64') i = 0 - for it in entries[0]: + for it in self.entries: rv[i,0] = it.first rv[i,1] = it.second i += 1 return rv cdef void _remove_duplicates(self): - cdef vector[ind_pair] *entries = self.entries cdef vector[ind_pair].iterator last - sort(entries[0].begin(), entries[0].end()) - last = unique(entries[0].begin(), entries[0].end()) - entries[0].erase(last, entries[0].end()) + sort(self.entries.begin(), self.entries.end()) + last = unique(self.entries.begin(), self.entries.end()) + self.entries.erase(last, self.entries.end()) # http://stackoverflow.com/questions/16970982/find-unique-rows-in-numpy-array # cdef np.ndarray[np.uint64_t, ndim=2] rv # cdef np.ndarray[np.uint64_t, ndim=2] rv_uni @@ -1830,57 +1701,44 @@ cdef class SparseUnorderedRefinedBitmaskVector: self.total = 0 def __dealloc__(self): - cdef vector[ind_pair] *entries = self.entries - del entries + self.entries.clear() # Set version cdef class SparseUnorderedRefinedBitmaskSet: - def __cinit__(self): - cdef cset[ind_pair] *entries = new cset[ind_pair]() - self.entries = entries - cdef void _set(self, np.uint64_t ind1, np.uint64_t ind2): cdef ind_pair ind - cdef cset[ind_pair] *entries = self.entries ind.first = ind1 ind.second = ind2 - entries[0].insert(ind) + self.entries.insert(ind) def set(self, ind1, ind2): self._set(ind1, ind2) cdef void _fill(self, np.uint8_t[:] mask1, np.uint8_t[:] mask2): - cdef cset[ind_pair] *entries = self.entries - for p in entries[0]: + for p in self.entries: mask1[p.first] = mask2[p.second] = 1 cdef void _fill_ewah(self, BoolArrayCollection mm): - cdef cset[ind_pair] *entries = self.entries - for it in entries[0]: + for it in self.entries: mm._set_refined(it.first, it.second) cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm): - cdef cset[ind_pair] *entries = self.entries - for it in entries[0]: + for it in self.entries: mm._set_refined(it.first, it.second) cdef void _reset(self): - cdef cset[ind_pair] *entries = self.entries - entries[0].clear() + self.entries.clear() cdef to_array(self): cdef int i cdef np.ndarray[np.uint64_t, ndim=2] rv - cdef cset[ind_pair] *entries = self.entries - rv = np.empty((entries[0].size(),2),dtype='uint64') + rv = np.empty((self.entries.size(),2),dtype='uint64') i = 0 - for it in entries[0]: + for it in self.entries: rv[i,0] = it.first rv[i,1] = it.second i += 1 return rv def __dealloc__(self): - cdef cset[ind_pair] *entries = self.entries - del entries - + self.entries.clear() From 2164393663e61547b10e413a5be70ed77d638a46 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Thu, 21 May 2020 15:50:03 -0500 Subject: [PATCH 22/42] Intermediate commit on way to working --- yt/geometry/particle_geometry_handler.py | 22 ++++- yt/geometry/particle_oct_container.pyx | 101 +++++++++++++++++------ yt/utilities/lib/ewah_bool_wrap.pyx | 12 +-- 3 files changed, 99 insertions(+), 36 deletions(-) diff --git a/yt/geometry/particle_geometry_handler.py b/yt/geometry/particle_geometry_handler.py index dd12a47e7af..ae2830ecde3 100644 --- a/yt/geometry/particle_geometry_handler.py +++ b/yt/geometry/particle_geometry_handler.py @@ -166,24 +166,42 @@ def _initialize_refined_index(self): for d in self.data_files) * 28 sub_mi1 = np.zeros(max_npart, "uint64") sub_mi2 = np.zeros(max_npart, "uint64") + mi1_dds = self.ds.domain_width.max() / (1 << self.regions.index_order1) + mi2_dds = mi1_dds / (1 << self.regions.index_order2) pb = get_pbar("Initializing refined index", len(self.data_files)) + count_threshold = getattr(self, '_index_count_threshold', + (1 << (3*self.regions.index_order2))/512) + total_refined = 0 + total_coarse_refined = ((mask >= 2) & (self.regions.particle_counts > count_threshold)).sum() + print("Total coarse refined zones: {} out of {} for {}%".format( + total_coarse_refined, mask.size, 100 * total_coarse_refined / mask.size)) for i, data_file in enumerate(self.data_files): pb.update(i) nsub_mi = 0 for ptype, pos in self.io._yield_coordinates(data_file): + print(i, ptype, pos.shape) + if pos.size == 0: continue if hasattr(self.ds, '_sph_ptypes') and ptype == self.ds._sph_ptypes[0]: hsml = self.io._get_smoothing_length( data_file, pos.dtype, pos.shape) + print("Has smoothing length: max coverage of %0.3e %0.3e and min coverage of %0.3e %0.3e" % ( + hsml.max() / mi1_dds, hsml.max() / mi2_dds, + hsml.min() / mi1_dds, hsml.min() / mi2_dds)) else: hsml = None + #hsml = None nsub_mi = self.regions._refined_index_data_file( pos, hsml, mask, sub_mi1, sub_mi2, - data_file.file_id, nsub_mi) + data_file.file_id, nsub_mi, count_threshold = count_threshold, + mask_threshold = 2) + total_refined += nsub_mi + continue self.regions._set_refined_index_data_file( sub_mi1, sub_mi2, data_file.file_id, nsub_mi) pb.finish() - self.regions.find_collisions_refined() + print("TOTAL REFINED", total_refined) + #self.regions.find_collisions_refined() def _detect_output_fields(self): # TODO: Add additional fields diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index b8409d64b0d..c12f334c0a6 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -12,6 +12,7 @@ from oct_container cimport OctreeContainer, Oct, OctInfo, ORDER_MAX, \ cimport oct_visitors from oct_visitors cimport cind, OctVisitor from libc.stdlib cimport malloc, free, qsort +from libc.string cimport memset from libc.math cimport floor, ceil, fmod from yt.utilities.lib.fp_utils cimport * from yt.utilities.lib.geometry_utils cimport bounded_morton, \ @@ -55,7 +56,7 @@ from ..utilities.lib.ewah_bool_wrap cimport SparseUnorderedRefinedBitmaskSet as from ..utilities.lib.ewah_bool_wrap cimport BoolArrayCollectionUncompressed as BoolArrayColl from ..utilities.lib.ewah_bool_wrap cimport FileBitmasks -ctypedef map[np.uint64_t, vector[bool]] CoarseRefinedSets +ctypedef map[np.uint64_t, np.uint8_t*] CoarseRefinedSets cdef class ParticleOctreeContainer(OctreeContainer): cdef Oct** oct_list @@ -414,6 +415,7 @@ cdef class ParticleBitmap: cdef public np.int32_t index_order1 cdef public np.int32_t index_order2 cdef public object masks + cdef public object particle_counts cdef public object counts cdef public object max_count cdef public object _last_selector @@ -458,6 +460,7 @@ cdef class ParticleBitmap: # by particles. # This is the simple way, for now. self.masks = np.zeros((1 << (index_order1 * 3), nfiles), dtype="uint8") + self.particle_counts = np.zeros(1 << (index_order1 * 3), dtype="uint64") self.bitmasks = FileBitmasks(self.nfiles) self.collisions = BoolArrayCollection() @@ -499,6 +502,7 @@ cdef class ParticleBitmap: cdef np.float64_t dds[3] cdef np.float64_t radius cdef np.uint8_t[:] mask = self.masks[:, file_id] + cdef np.uint64_t[:] particle_counts = self.particle_counts cdef np.int64_t msize = (1 << (self.index_order1 * 3)) cdef int axiter[3][2] cdef np.float64_t axiterv[3][2] @@ -526,6 +530,7 @@ cdef class ParticleBitmap: mi = bounded_morton_split_dds(ppos[0], ppos[1], ppos[2], LE, dds, mi_split) mask[mi] = 1 + particle_counts[mi] += 1 # Expand mask by softening if hsml is None: continue @@ -566,6 +571,7 @@ cdef class ParticleBitmap: for zex in range(bounds[2][0], bounds[2][1]): miex = encode_morton_64bit(xex, yex, zex) mask[miex] = 1 + particle_counts[miex] += 1 if miex >= msize: raise IndexError( "Index for a softening region " + @@ -600,10 +606,13 @@ cdef class ParticleBitmap: np.ndarray[np.uint8_t, ndim=1] mask, np.ndarray[np.uint64_t, ndim=1] sub_mi1, np.ndarray[np.uint64_t, ndim=1] sub_mi2, - np.uint64_t file_id, np.int64_t nsub_mi): + np.uint64_t file_id, np.int64_t nsub_mi, + np.uint64_t count_threshold = 128, + np.uint8_t mask_threshold = 2): return self.__refined_index_data_file(pos, hsml, mask, sub_mi1, sub_mi2, - file_id, nsub_mi) + file_id, nsub_mi, + count_threshold, mask_threshold) @cython.boundscheck(False) @cython.wraparound(False) @@ -616,15 +625,17 @@ cdef class ParticleBitmap: np.ndarray[np.uint8_t, ndim=1] mask, np.ndarray[np.uint64_t, ndim=1] sub_mi1, np.ndarray[np.uint64_t, ndim=1] sub_mi2, - np.uint64_t file_id, np.int64_t nsub_mi + np.uint64_t file_id, np.int64_t nsub_mi, + np.uint64_t count_threshold, np.uint8_t mask_threshold ) except -1: # Initialize - cdef np.int64_t i, p + cdef np.int64_t i, p, sorted_ind cdef np.uint64_t mi1, mi2 cdef np.float64_t ppos[3] cdef np.float64_t s_ppos[3] # shifted ppos cdef int skip, Nex cdef np.uint64_t bounds[2][3] + cdef np.uint8_t fully_enclosed cdef np.float64_t LE[3] cdef np.float64_t RE[3] cdef np.float64_t DW[3] @@ -635,6 +646,7 @@ cdef class ParticleBitmap: cdef np.uint64_t mi_split1[3] cdef np.uint64_t mi_split2[3] cdef np.uint64_t miex1, miex2, mi1_max, mi2_max + cdef np.uint64_t[:] particle_counts = self.particle_counts cdef int Nex_min[3] cdef int Nex_max[3] cdef np.float64_t rpos_min, rpos_max @@ -655,7 +667,7 @@ cdef class ParticleBitmap: cdef np.float64_t axiterv[3][2] cdef CoarseRefinedSets coarse_refined_map cdef map[np.uint64_t, np.uint64_t] refined_count - cdef np.uint64_t nset = 0 + cdef np.uint64_t nset = 0, nfully_enclosed = 0, n_calls = 0 mi1_max = (1 << self.index_order1) - 1 mi2_max = (1 << self.index_order2) - 1 cdef np.uint64_t max_mi2_elements = 1 << (3*self.index_order2) @@ -669,8 +681,18 @@ cdef class ParticleBitmap: DW[i] = RE[i] - LE[i] axiter[i][0] = 0 # We always do an offset of 0 axiterv[i][0] = 0.0 - # Loop over positions skipping those outside the domain + cdef np.ndarray[np.uint64_t, ndim=1] morton_indices = np.empty(pos.shape[0], dtype="u8") for p in range(pos.shape[0]): + morton_indices[p] = bounded_morton(pos[p, 0], pos[p, 1], pos[p, 2], + LE, RE, self.index_order1) + # Loop over positions skipping those outside the domain + cdef np.ndarray[np.uint64_t, ndim=1, cast=True] sorted_order + if hsml is None: + sorted_order = np.argsort(morton_indices) + else: + sorted_order = np.argsort(hsml)[::-1] + for sorted_ind in range(sorted_order.shape[0]): + p = sorted_order[sorted_ind] skip = 0 for i in range(3): axiter[i][1] = 999 @@ -683,14 +705,16 @@ cdef class ParticleBitmap: mi1 = bounded_morton_split_dds(ppos[0], ppos[1], ppos[2], LE, dds1, mi_split1) if hsml is None: - if mask[mi1] <= 1: # only one thing in this area + if mask[mi1] < mask_threshold \ + or particle_counts[mi1] < count_threshold: continue # Determine sub index within cell of primary index mi2 = bounded_morton_split_relative_dds( ppos[0], ppos[1], ppos[2], LE, dds1, dds2, mi_split2) - if coarse_refined_map[mi1].size() == 0: - coarse_refined_map[mi1].resize(max_mi2_elements, False) - refined_count[mi1] = 0 + if refined_count[mi1] == 0: + coarse_refined_map[mi1] = malloc( + sizeof(np.uint8_t) * max_mi2_elements) + memset(coarse_refined_map[mi1], 0, max_mi2_elements) if coarse_refined_map[mi1][mi2] == False: coarse_refined_map[mi1][mi2] = True refined_count[mi1] += 1 @@ -699,8 +723,8 @@ cdef class ParticleBitmap: # except here we need to fill in all the subranges as well as the coarse ranges # Note that we are also doing the null case, where we do no shifting radius = hsml[p] - if mask[mi1] <= 1: # only one thing in this area - continue + #if mask[mi1] <= 4: # only one thing in this area + # continue for i in range(3): if PER[i] and ppos[i] - radius < LE[i]: axiter[i][1] = +1 @@ -720,25 +744,47 @@ cdef class ParticleBitmap: # OK, now we compute the left and right edges for this shift. for i in range(3): # casting to int64 is not nice but is so we can have negative values we clip - clip_pos_l[i] = fmax(s_ppos[i] - radius, LE[i] + dds1[i]/2) - clip_pos_r[i] = fmin(s_ppos[i] + radius, RE[i] - dds1[i]/2) + clip_pos_l[i] = fmax(s_ppos[i] - radius, LE[i] + dds1[i]/10) + clip_pos_r[i] = fmin(s_ppos[i] + radius, RE[i] - dds1[i]/10) + bounded_morton_split_dds(clip_pos_l[0], clip_pos_l[1], clip_pos_l[2], LE, dds1, bounds[0]) bounded_morton_split_dds(clip_pos_r[0], clip_pos_r[1], clip_pos_r[2], LE, dds1, bounds[1]) + # We go to the upper bound plus one so that we have *inclusive* loops -- the upper bound # is the cell *index*, so we want to make sure we include that cell. This is also why # we don't need to worry about mi_max being the max index rather than the cell count. + # One additional thing to note is that for all of + # the *internal* cells, i.e., those that are both + # greater than the left edge and less than the + # right edge, we are fully enclosed. for xex in range(bounds[0][0], bounds[1][0] + 1): for yex in range(bounds[0][1], bounds[1][1] + 1): for zex in range(bounds[0][2], bounds[1][2] + 1): miex1 = encode_morton_64bit(xex, yex, zex) - if mask[miex1] <= 1: + if mask[miex1] < mask_threshold or \ + particle_counts[miex1] < count_threshold: continue + # this explicitly requires that it be *between* + # them, not overlapping + if xex > bounds[0][0] and xex < bounds[1][0] and \ + yex > bounds[0][1] and yex < bounds[1][1] and \ + zex > bounds[0][2] and zex < bounds[1][2]: + fully_enclosed = 1 + else: + fully_enclosed = 0 # Now we need to fill our sub-range - if coarse_refined_map[miex1].size() == 0: - coarse_refined_map[miex1].resize(max_mi2_elements, False) - refined_count[miex1] = 0 - if refined_count[miex1] >= max_mi2_elements: + if refined_count[miex1] == 0: + coarse_refined_map[miex1] = malloc( + sizeof(np.uint8_t) * max_mi2_elements) + memset(coarse_refined_map[miex1], 0, max_mi2_elements) + elif refined_count[miex1] >= max_mi2_elements: + continue + if fully_enclosed == 1: + nfully_enclosed += 1 + memset(coarse_refined_map[miex1], 0xFF, max_mi2_elements) + refined_count[miex1] = max_mi2_elements continue + n_calls += 1 refined_count[miex1] += self.__fill_refined_ranges(s_ppos, radius, LE, RE, dds1, xex, yex, zex, dds2, mi1_max, mi2_max, miex1, @@ -746,18 +792,21 @@ cdef class ParticleBitmap: max_mi2_elements) print("THIS MANY COARSE CELLS", coarse_refined_map.size()) print("THIS MANY NSET", nset, nset / pos.shape[0], nsub_mi) + if n_calls > 0: + print("THIS MANY TERMINATIONS AND THIS MANY CALLS", nfully_enclosed, n_calls, nfully_enclosed / n_calls) cdef np.uint64_t count, vec_i cdef np.uint64_t total_count = 0 for it1 in coarse_refined_map: mi1 = it1.first count = 0 vec_i = 0 - for vec_i in range(it1.second.size()): - if it1.second[vec_i] == True: + for vec_i in range(max_mi2_elements): + if it1.second[vec_i] > 0: count += 1 #sub_mi1[nsub_mi] = mi1 #sub_mi2[nsub_mi] = vec_i nsub_mi += 1 + free(coarse_refined_map[mi1]) if count != refined_count[mi1]: print("WHY IS THIS WRONG", count, refined_count[mi1]) #print("IN ", mi1, "THIS MANY REFINED CELLS", count) @@ -922,7 +971,7 @@ cdef class ParticleBitmap: np.float64_t dds1[3], np.uint64_t xex, np.uint64_t yex, np.uint64_t zex, np.float64_t dds2[3], np.uint64_t mi1_max, np.uint64_t mi2_max, np.uint64_t miex1, - vector[bool] &refined_set, np.float64_t ppos[3], np.uint64_t mcount, + np.uint8_t *refined_set, np.float64_t ppos[3], np.uint64_t mcount, np.uint64_t max_mi2_elements) except -1: cdef int i cdef np.uint64_t new_nsub = 0 @@ -938,6 +987,8 @@ cdef class ParticleBitmap: # full domain cell_edge_l = ex1[i] * dds1[i] + LE[i] cell_edge_r = cell_edge_l + dds1[i] + if s_ppos[i] + radius < cell_edge_l or s_ppos[i] - radius > cell_edge_r: + return 0 clip_pos_l[i] = fmax(s_ppos[i] - radius, cell_edge_l + dds2[i]/2.0) clip_pos_r[i] = fmin(s_ppos[i] + radius, cell_edge_r - dds2[i]/2.0) miex2_min = bounded_morton_split_relative_dds(clip_pos_l[0], clip_pos_l[1], clip_pos_l[2], @@ -951,14 +1002,14 @@ cdef class ParticleBitmap: #miex2 = encode_morton_64bit(xex2, yex2, zex2) #decode_morton_64bit(miex2, ex2) # Let's check all our cases here - if refined_set[miex2] == True: continue + if refined_set[miex2] > 0: continue if (miex2 & xex_max) < (miex2_min & xex_max): continue if (miex2 & yex_max) < (miex2_min & yex_max): continue if (miex2 & zex_max) < (miex2_min & zex_max): continue if (miex2 & xex_max) > (miex2_max & xex_max): continue if (miex2 & yex_max) > (miex2_max & yex_max): continue if (miex2 & zex_max) > (miex2_max & zex_max): continue - refined_set[miex2] = True + refined_set[miex2] = 1 new_nsub += 1 return new_nsub diff --git a/yt/utilities/lib/ewah_bool_wrap.pyx b/yt/utilities/lib/ewah_bool_wrap.pyx index f25a785386e..cf96862718f 100644 --- a/yt/utilities/lib/ewah_bool_wrap.pyx +++ b/yt/utilities/lib/ewah_bool_wrap.pyx @@ -606,16 +606,10 @@ cdef class FileBitmasks: return self._check() def __dealloc__(self): - cdef ewah_bool_array *ewah_keys - cdef ewah_bool_array *ewah_refn - cdef ewah_map *ewah_coll for ifile in range(self.nfiles): - ewah_keys = ( self.ewah_keys)[ifile] - ewah_refn = ( self.ewah_refn)[ifile] - ewah_coll = ( self.ewah_coll)[ifile] - del ewah_keys - del ewah_refn - del ewah_coll + del self.ewah_keys[ifile] + del self.ewah_refn[ifile] + del self.ewah_coll[ifile] def print_info(self, ifile, prefix=''): print("{}{: 8d} coarse, {: 8d} refined, {: 8d} total".format( From e89bd594d228665bb1d40d9d346047dae2f114e4 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Thu, 21 May 2020 16:30:13 -0500 Subject: [PATCH 23/42] Keep a semi-running tally of bool array collections --- yt/geometry/particle_geometry_handler.py | 23 +-- yt/geometry/particle_oct_container.pyx | 189 +++-------------------- yt/utilities/lib/ewah_bool_wrap.pyx | 3 + 3 files changed, 39 insertions(+), 176 deletions(-) diff --git a/yt/geometry/particle_geometry_handler.py b/yt/geometry/particle_geometry_handler.py index ae2830ecde3..61aaf6d9e4a 100644 --- a/yt/geometry/particle_geometry_handler.py +++ b/yt/geometry/particle_geometry_handler.py @@ -169,13 +169,16 @@ def _initialize_refined_index(self): mi1_dds = self.ds.domain_width.max() / (1 << self.regions.index_order1) mi2_dds = mi1_dds / (1 << self.regions.index_order2) pb = get_pbar("Initializing refined index", len(self.data_files)) + mask_threshold = getattr(self, '_index_mask_threshold', 2) count_threshold = getattr(self, '_index_count_threshold', - (1 << (3*self.regions.index_order2))/512) + (1 << (3*self.regions.index_order2))/128) + print("Count threshold ", count_threshold) total_refined = 0 total_coarse_refined = ((mask >= 2) & (self.regions.particle_counts > count_threshold)).sum() print("Total coarse refined zones: {} out of {} for {}%".format( total_coarse_refined, mask.size, 100 * total_coarse_refined / mask.size)) for i, data_file in enumerate(self.data_files): + coll = None pb.update(i) nsub_mi = 0 for ptype, pos in self.io._yield_coordinates(data_file): @@ -190,18 +193,18 @@ def _initialize_refined_index(self): else: hsml = None #hsml = None - nsub_mi = self.regions._refined_index_data_file( - pos, hsml, mask, sub_mi1, sub_mi2, + nsub_mi, coll = self.regions._refined_index_data_file( + coll, pos, hsml, mask, sub_mi1, sub_mi2, data_file.file_id, nsub_mi, count_threshold = count_threshold, - mask_threshold = 2) + mask_threshold = mask_threshold) total_refined += nsub_mi - continue - self.regions._set_refined_index_data_file( - sub_mi1, sub_mi2, - data_file.file_id, nsub_mi) + self.regions.bitmasks.append(data_file.file_id, coll) + #self.regions._set_refined_index_data_file( + # sub_mi1, sub_mi2, + # data_file.file_id, nsub_mi) pb.finish() - print("TOTAL REFINED", total_refined) - #self.regions.find_collisions_refined() + #print("TOTAL REFINED", total_refined) + self.regions.find_collisions_refined() def _detect_output_fields(self): # TODO: Add additional fields diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index c12f334c0a6..a760e6450c0 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -428,7 +428,7 @@ cdef class ParticleBitmap: cdef np.uint32_t *file_markers cdef np.uint64_t n_file_markers cdef np.uint64_t file_marker_i - cdef FileBitmasks bitmasks + cdef public FileBitmasks bitmasks cdef public BoolArrayCollection collisions def __init__(self, left_edge, right_edge, periodicity, file_hash, nfiles, @@ -601,6 +601,7 @@ cdef class ParticleBitmap: @cython.cdivision(True) @cython.initializedcheck(False) def _refined_index_data_file(self, + BoolArrayCollection in_collection, np.ndarray[floating, ndim=2] pos, np.ndarray[floating, ndim=1] hsml, np.ndarray[np.uint8_t, ndim=1] mask, @@ -609,31 +610,38 @@ cdef class ParticleBitmap: np.uint64_t file_id, np.int64_t nsub_mi, np.uint64_t count_threshold = 128, np.uint8_t mask_threshold = 2): - return self.__refined_index_data_file(pos, hsml, mask, + if in_collection is None: + in_collection = BoolArrayCollection() + cdef BoolArrayCollection _in_coll = in_collection + cdef np.int64_t nsub + out_collection = self.__refined_index_data_file(_in_coll, pos, hsml, mask, sub_mi1, sub_mi2, - file_id, nsub_mi, + file_id, &nsub, count_threshold, mask_threshold) + return nsub, out_collection @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) @cython.initializedcheck(False) - cdef np.int64_t __refined_index_data_file( + cdef BoolArrayCollection __refined_index_data_file( self, + BoolArrayCollection in_collection, np.ndarray[floating, ndim=2] pos, np.ndarray[floating, ndim=1] hsml, np.ndarray[np.uint8_t, ndim=1] mask, np.ndarray[np.uint64_t, ndim=1] sub_mi1, np.ndarray[np.uint64_t, ndim=1] sub_mi2, - np.uint64_t file_id, np.int64_t nsub_mi, + np.uint64_t file_id, np.int64_t *nsub_mi, np.uint64_t count_threshold, np.uint8_t mask_threshold - ) except -1: + ): # Initialize cdef np.int64_t i, p, sorted_ind cdef np.uint64_t mi1, mi2 cdef np.float64_t ppos[3] cdef np.float64_t s_ppos[3] # shifted ppos cdef int skip, Nex + cdef BoolArrayCollection this_collection, out_collection cdef np.uint64_t bounds[2][3] cdef np.uint8_t fully_enclosed cdef np.float64_t LE[3] @@ -790,12 +798,10 @@ cdef class ParticleBitmap: dds2, mi1_max, mi2_max, miex1, coarse_refined_map[miex1], ppos, mask[miex1], max_mi2_elements) - print("THIS MANY COARSE CELLS", coarse_refined_map.size()) - print("THIS MANY NSET", nset, nset / pos.shape[0], nsub_mi) - if n_calls > 0: - print("THIS MANY TERMINATIONS AND THIS MANY CALLS", nfully_enclosed, n_calls, nfully_enclosed / n_calls) cdef np.uint64_t count, vec_i cdef np.uint64_t total_count = 0 + this_collection = BoolArrayCollection() + print("Appending to the new BoolArrayCollection") for it1 in coarse_refined_map: mi1 = it1.first count = 0 @@ -803,164 +809,15 @@ cdef class ParticleBitmap: for vec_i in range(max_mi2_elements): if it1.second[vec_i] > 0: count += 1 - #sub_mi1[nsub_mi] = mi1 - #sub_mi2[nsub_mi] = vec_i - nsub_mi += 1 + nsub_mi[0] += 1 + this_collection._set(mi1, vec_i) free(coarse_refined_map[mi1]) - if count != refined_count[mi1]: - print("WHY IS THIS WRONG", count, refined_count[mi1]) - #print("IN ", mi1, "THIS MANY REFINED CELLS", count) total_count += count - if coarse_refined_map.size() > 0: - print("NSUB_MI NOW", total_count, total_count / (coarse_refined_map.size() * max_mi2_elements), nsub_mi, sub_mi1.shape[0], sub_mi2.shape[0]) - return nsub_mi - - if 0: - # Expand for smoothing - Nex = 1 - for i in range(3): - Nex_min[i] = 0 - Nex_max[i] = 0 - rpos_min = ppos[i] - (dds2[i]*mi_split2[i] + dds1[i]*mi_split1[i] + LE[i]) - rpos_max = dds2[i] - rpos_min - if rpos_min > hsml[p]: - Nex_min[i] = ((rpos_min-hsml[p])/dds2[i]) + 1 - if rpos_max > hsml[p]: - Nex_max[i] = ((rpos_max-hsml[p])/dds2[i]) + 1 - Nex *= (Nex_max[i] + Nex_min[i] + 1) - if Nex > 1: - # Ensure that min/max values for x,y,z indexes are obeyed - if (Nex_max[0] + Nex_min[0] + 1) > xex1_range.shape[0]: - xex1_range = np.empty(Nex_max[0] + Nex_min[0] + 1, 'uint64') - xex2_range = np.empty(Nex_max[0] + Nex_min[0] + 1, 'uint64') - if (Nex_max[1] + Nex_min[1] + 1) > yex1_range.shape[0]: - yex1_range = np.empty(Nex_max[1] + Nex_min[1] + 1, 'uint64') - yex2_range = np.empty(Nex_max[1] + Nex_min[1] + 1, 'uint64') - if (Nex_max[2] + Nex_min[2] + 1) > zex1_range.shape[0]: - zex1_range = np.empty(Nex_max[2] + Nex_min[2] + 1, 'uint64') - zex2_range = np.empty(Nex_max[2] + Nex_min[2] + 1, 'uint64') - xex2_min = mi_split2[0] - min(Nex_min[0], mi_split2[0]) - xex2_max = mi_split2[0] + min(Nex_max[0], (mi2_max - mi_split2[0])) + 1 - yex2_min = mi_split2[1] - min(Nex_min[1], mi_split2[1]) - yex2_max = mi_split2[1] + min(Nex_max[1], (mi2_max - mi_split2[1])) + 1 - zex2_min = mi_split2[2] - min(Nex_min[2], mi_split2[2]) - zex2_max = mi_split2[2] + min(Nex_max[2], (mi2_max - mi_split2[2])) + 1 - ixe = iye = ize = 0 - for xex2 in range(xex2_min, xex2_max): - xex1_range[ixe] = mi_split1[0] - xex2_range[ixe] = xex2 - ixe += 1 - for yex2 in range(yex2_min, yex2_max): - yex1_range[iye] = mi_split1[1] - yex2_range[iye] = yex2 - iye += 1 - for zex2 in range(zex2_min, zex2_max): - zex1_range[ize] = mi_split1[2] - zex2_range[ize] = zex2 - ize += 1 - # Expand to adjacent coarse cells, wrapping periodically - # if need be - # x - if Nex_min[0] > mi_split2[0]: - if mi_split1[0] > 0: - for xex2 in range(mi2_max + 1 - (Nex_min[0] - mi_split2[0]), mi2_max + 1): - xex1_range[ixe] = mi_split1[0] - 1 - xex2_range[ixe] = xex2 - ixe += 1 - elif PER[0]: - for xex2 in range(mi2_max + 1 - (Nex_min[0] - mi_split2[0]), mi2_max + 1): - xex1_range[ixe] = mi1_max - xex2_range[ixe] = xex2 - ixe += 1 - if Nex_max[0] > (mi2_max-mi_split2[0]): - if mi_split1[0] < mi1_max: - for xex2 in range(0, Nex_max[0] - (mi2_max-mi_split2[0])): - xex1_range[ixe] = mi_split1[0] + 1 - xex2_range[ixe] = xex2 - ixe += 1 - elif PER[0]: - for xex2 in range(0, Nex_max[0] - (mi2_max-mi_split2[0])): - xex1_range[ixe] = 0 - xex2_range[ixe] = xex2 - ixe += 1 - # y - if Nex_min[1] > mi_split2[1]: - if mi_split1[1] > 0: - for yex2 in range(mi2_max + 1 - (Nex_min[1] - mi_split2[1]), mi2_max + 1): - yex1_range[iye] = mi_split1[1] - 1 - yex2_range[iye] = yex2 - iye += 1 - elif PER[1]: - for yex2 in range(mi2_max + 1 - (Nex_min[1] - mi_split2[1]), mi2_max + 1): - yex1_range[iye] = mi1_max - yex2_range[iye] = yex2 - iye += 1 - if Nex_max[1] > (mi2_max-mi_split2[1]): - if mi_split1[1] < mi1_max: - for yex2 in range(0, Nex_max[1] - (mi2_max-mi_split2[1])): - yex1_range[iye] = mi_split1[1] + 1 - yex2_range[iye] = yex2 - iye += 1 - elif PER[1]: - for yex2 in range(0, Nex_max[1] - (mi2_max-mi_split2[1])): - yex1_range[iye] = 0 - yex2_range[iye] = yex2 - iye += 1 - # z - if Nex_min[2] > mi_split2[2]: - if mi_split1[2] > 0: - for zex2 in range(mi2_max + 1 - (Nex_min[2] - mi_split2[2]), mi2_max + 1): - zex1_range[ize] = mi_split1[2] - 1 - zex2_range[ize] = zex2 - ize += 1 - elif PER[2]: - for zex2 in range(mi2_max + 1 - (Nex_min[2] - mi_split2[2]), mi2_max + 1): - zex1_range[ize] = mi1_max - zex2_range[ize] = zex2 - ize += 1 - if Nex_max[2] > (mi2_max-mi_split2[2]): - if mi_split1[2] < mi1_max: - for zex2 in range(0, Nex_max[2] - (mi2_max-mi_split2[2])): - zex1_range[ize] = mi_split1[2] + 1 - zex2_range[ize] = zex2 - ize += 1 - elif PER[2]: - for zex2 in range(0, Nex_max[2] - (mi2_max-mi_split2[2])): - zex1_range[ize] = 0 - zex2_range[ize] = zex2 - ize += 1 - for ix in range(ixe): - xex1 = xex1_range[ix] - xex2 = xex2_range[ix] - for iy in range(iye): - yex1 = yex1_range[iy] - yex2 = yex2_range[iy] - for iz in range(ize): - zex1 = zex1_range[iz] - zex2 = zex2_range[iz] - if (xex1 == mi_split1[0] and xex2 == mi_split2[0] and - yex1 == mi_split1[1] and yex2 == mi_split2[1] and - zex1 == mi_split1[2] and zex2 == mi_split2[2]): - continue - miex1 = encode_morton_64bit(xex1, yex1, zex1) - miex2 = encode_morton_64bit(xex2, yex2, zex2) - if nsub_mi >= msize: - # Uncomment these lines to allow periodic - # caching of refined indices - # self.bitmasks._set_refined_index_array( - # file_id, nsub_mi, sub_mi1, sub_mi2) - # nsub_mi = 0 - raise IndexError( - "Refined index exceeded original " - "estimate.\n" - "nsub_mi = %s, " - "sub_mi1.shape[0] = %s" - % (nsub_mi, sub_mi1.shape[0])) - sub_mi1[nsub_mi] = miex1 - sub_mi2[nsub_mi] = miex2 - nsub_mi += 1 - # Only subs of particles in the mask - return nsub_mi + out_collection = BoolArrayCollection() + print("Logical or-ing") + in_collection._logicalor(this_collection, out_collection) + print("Completed") + return out_collection @cython.boundscheck(False) @cython.wraparound(False) diff --git a/yt/utilities/lib/ewah_bool_wrap.pyx b/yt/utilities/lib/ewah_bool_wrap.pyx index cf96862718f..9cfc5cfd2d6 100644 --- a/yt/utilities/lib/ewah_bool_wrap.pyx +++ b/yt/utilities/lib/ewah_bool_wrap.pyx @@ -321,6 +321,9 @@ cdef class FileBitmasks: out = ewah_refn[0].numberOfOnes() return out + def append(self, np.uint32_t ifile, BoolArrayCollection solf): + self._append(ifile, solf) + cdef void _append(self, np.uint32_t ifile, BoolArrayCollection solf): cdef ewah_bool_array *ewah_keys1 = ( self.ewah_keys)[ifile] cdef ewah_bool_array *ewah_refn1 = ( self.ewah_refn)[ifile] From 09aacb9afcccff3156c126cfbe624d87dd18432b Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Fri, 22 May 2020 11:04:16 -0500 Subject: [PATCH 24/42] Switch to using BoolArray --- yt/geometry/particle_oct_container.pyx | 43 +++++++++++++++----------- yt/utilities/lib/ewah_bool_array.pxd | 19 ++++++++++-- 2 files changed, 42 insertions(+), 20 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index a760e6450c0..5e5b586d182 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -31,7 +31,7 @@ from yt.funcs import get_pbar from particle_deposit cimport gind from yt.utilities.lib.ewah_bool_array cimport \ - ewah_bool_array, ewah_bool_iterator, ewah_map + ewah_bool_array, ewah_bool_iterator, ewah_map, bool_array #from yt.utilities.lib.ewah_bool_wrap cimport \ from ..utilities.lib.ewah_bool_wrap cimport BoolArrayCollection from libcpp cimport bool @@ -56,7 +56,7 @@ from ..utilities.lib.ewah_bool_wrap cimport SparseUnorderedRefinedBitmaskSet as from ..utilities.lib.ewah_bool_wrap cimport BoolArrayCollectionUncompressed as BoolArrayColl from ..utilities.lib.ewah_bool_wrap cimport FileBitmasks -ctypedef map[np.uint64_t, np.uint8_t*] CoarseRefinedSets +ctypedef map[np.uint64_t, bool_array] CoarseRefinedSets cdef class ParticleOctreeContainer(OctreeContainer): cdef Oct** oct_list @@ -720,11 +720,9 @@ cdef class ParticleBitmap: mi2 = bounded_morton_split_relative_dds( ppos[0], ppos[1], ppos[2], LE, dds1, dds2, mi_split2) if refined_count[mi1] == 0: - coarse_refined_map[mi1] = malloc( - sizeof(np.uint8_t) * max_mi2_elements) - memset(coarse_refined_map[mi1], 0, max_mi2_elements) - if coarse_refined_map[mi1][mi2] == False: - coarse_refined_map[mi1][mi2] = True + coarse_refined_map[mi1].padWithZeroes(max_mi2_elements) + if coarse_refined_map[mi1].get(mi2) == False: + coarse_refined_map[mi1].set(mi2) refined_count[mi1] += 1 else: # only hit if we have smoothing lengths. # We have to do essentially the identical process to in the coarse indexing, @@ -782,14 +780,14 @@ cdef class ParticleBitmap: fully_enclosed = 0 # Now we need to fill our sub-range if refined_count[miex1] == 0: - coarse_refined_map[miex1] = malloc( - sizeof(np.uint8_t) * max_mi2_elements) - memset(coarse_refined_map[miex1], 0, max_mi2_elements) + coarse_refined_map[miex1].padWithZeroes(max_mi2_elements) elif refined_count[miex1] >= max_mi2_elements: continue if fully_enclosed == 1: nfully_enclosed += 1 - memset(coarse_refined_map[miex1], 0xFF, max_mi2_elements) + coarse_refined_map[miex1].inplace_logicalxor( + coarse_refined_map[miex1]) + coarse_refined_map[miex1].inplace_logicalnot() refined_count[miex1] = max_mi2_elements continue n_calls += 1 @@ -800,18 +798,27 @@ cdef class ParticleBitmap: max_mi2_elements) cdef np.uint64_t count, vec_i cdef np.uint64_t total_count = 0 + cdef bool_array *buf = NULL this_collection = BoolArrayCollection() - print("Appending to the new BoolArrayCollection") + cdef ewah_bool_array *refined_arr = NULL + print("Appending to the new BoolArrayCollection", coarse_refined_map.size()) + cdef np.uint64_t ncrm = 0 for it1 in coarse_refined_map: + if ncrm % 1000 == 0: + print(ncrm) + ncrm += 1 mi1 = it1.first + refined_arr = &this_collection.ewah_coll[0][mi1] + this_collection.ewah_keys[0].set(mi1) + this_collection.ewah_refn[0].set(mi1) count = 0 vec_i = 0 + buf = &it1.second for vec_i in range(max_mi2_elements): - if it1.second[vec_i] > 0: + if buf.get(vec_i) > 0: count += 1 + refined_arr.set(vec_i) nsub_mi[0] += 1 - this_collection._set(mi1, vec_i) - free(coarse_refined_map[mi1]) total_count += count out_collection = BoolArrayCollection() print("Logical or-ing") @@ -828,7 +835,7 @@ cdef class ParticleBitmap: np.float64_t dds1[3], np.uint64_t xex, np.uint64_t yex, np.uint64_t zex, np.float64_t dds2[3], np.uint64_t mi1_max, np.uint64_t mi2_max, np.uint64_t miex1, - np.uint8_t *refined_set, np.float64_t ppos[3], np.uint64_t mcount, + bool_array &refined_set, np.float64_t ppos[3], np.uint64_t mcount, np.uint64_t max_mi2_elements) except -1: cdef int i cdef np.uint64_t new_nsub = 0 @@ -859,14 +866,14 @@ cdef class ParticleBitmap: #miex2 = encode_morton_64bit(xex2, yex2, zex2) #decode_morton_64bit(miex2, ex2) # Let's check all our cases here - if refined_set[miex2] > 0: continue + if refined_set.get(miex2): continue if (miex2 & xex_max) < (miex2_min & xex_max): continue if (miex2 & yex_max) < (miex2_min & yex_max): continue if (miex2 & zex_max) < (miex2_min & zex_max): continue if (miex2 & xex_max) > (miex2_max & xex_max): continue if (miex2 & yex_max) > (miex2_max & yex_max): continue if (miex2 & zex_max) > (miex2_max & zex_max): continue - refined_set[miex2] = 1 + refined_set.set(miex2) new_nsub += 1 return new_nsub diff --git a/yt/utilities/lib/ewah_bool_array.pxd b/yt/utilities/lib/ewah_bool_array.pxd index 59d8db03328..797a681134b 100644 --- a/yt/utilities/lib/ewah_bool_array.pxd +++ b/yt/utilities/lib/ewah_bool_array.pxd @@ -11,6 +11,7 @@ cimport cython from libcpp.vector cimport vector from libcpp.map cimport map from libcpp.string cimport string +from libcpp cimport bool from libc.stdint cimport uint64_t # Streams req for c++ IO @@ -71,8 +72,22 @@ cdef extern from "ewah.h": EWAHBoolArraySetBitForwardIterator begin() EWAHBoolArraySetBitForwardIterator end() -ctypedef EWAHBoolArray[uint64_t] ewah_bool_array -ctypedef EWAHBoolArraySetBitForwardIterator[uint64_t] ewah_bool_iterator +cdef extern from "boolarray.h": + cppclass BoolArray[uword]: + void setSizeInBits(size_t sizeib) + void set(size_t pos) + void unset(size_t pos) + bool get(size_t pos) + void reset() + size_t sizeInBits() + size_t numberOfOnes() + void inplace_logicalxor(BoolArray &other) + void inplace_logicalnot() + size_t padWithZeroes(size_t totalbits) + +ctypedef EWAHBoolArray[np.uint64_t] ewah_bool_array +ctypedef EWAHBoolArraySetBitForwardIterator[np.uint64_t] ewah_bool_iterator ctypedef vector[size_t] bitset_array ctypedef map[np.uint64_t, ewah_bool_array] ewah_map ctypedef stringstream sstream +ctypedef BoolArray[np.uint64_t] bool_array From c82a015a838cb72fa32975296674273d9ff1730b Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Fri, 22 May 2020 15:58:36 -0500 Subject: [PATCH 25/42] Switch to word adding for refined EWAH. --- yt/geometry/particle_geometry_handler.py | 18 ++------ yt/geometry/particle_oct_container.pyx | 56 ++++++++++++------------ yt/utilities/lib/ewah_bool_array.pxd | 11 +++-- 3 files changed, 41 insertions(+), 44 deletions(-) diff --git a/yt/geometry/particle_geometry_handler.py b/yt/geometry/particle_geometry_handler.py index 61aaf6d9e4a..6950203dc86 100644 --- a/yt/geometry/particle_geometry_handler.py +++ b/yt/geometry/particle_geometry_handler.py @@ -170,40 +170,30 @@ def _initialize_refined_index(self): mi2_dds = mi1_dds / (1 << self.regions.index_order2) pb = get_pbar("Initializing refined index", len(self.data_files)) mask_threshold = getattr(self, '_index_mask_threshold', 2) - count_threshold = getattr(self, '_index_count_threshold', - (1 << (3*self.regions.index_order2))/128) - print("Count threshold ", count_threshold) + count_threshold = getattr(self, '_index_count_threshold', 256) + mylog.debug("Using estimated thresholds of %s and %s for refinement", mask_threshold, count_threshold) total_refined = 0 total_coarse_refined = ((mask >= 2) & (self.regions.particle_counts > count_threshold)).sum() - print("Total coarse refined zones: {} out of {} for {}%".format( - total_coarse_refined, mask.size, 100 * total_coarse_refined / mask.size)) + mylog.debug("This should produce roughly %s zones, for %s of the domain", + total_coarse_refined, 100 * total_coarse_refined / mask.size) for i, data_file in enumerate(self.data_files): coll = None pb.update(i) nsub_mi = 0 for ptype, pos in self.io._yield_coordinates(data_file): - print(i, ptype, pos.shape) if pos.size == 0: continue if hasattr(self.ds, '_sph_ptypes') and ptype == self.ds._sph_ptypes[0]: hsml = self.io._get_smoothing_length( data_file, pos.dtype, pos.shape) - print("Has smoothing length: max coverage of %0.3e %0.3e and min coverage of %0.3e %0.3e" % ( - hsml.max() / mi1_dds, hsml.max() / mi2_dds, - hsml.min() / mi1_dds, hsml.min() / mi2_dds)) else: hsml = None - #hsml = None nsub_mi, coll = self.regions._refined_index_data_file( coll, pos, hsml, mask, sub_mi1, sub_mi2, data_file.file_id, nsub_mi, count_threshold = count_threshold, mask_threshold = mask_threshold) total_refined += nsub_mi self.regions.bitmasks.append(data_file.file_id, coll) - #self.regions._set_refined_index_data_file( - # sub_mi1, sub_mi2, - # data_file.file_id, nsub_mi) pb.finish() - #print("TOTAL REFINED", total_refined) self.regions.find_collisions_refined() def _detect_output_fields(self): diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index 5e5b586d182..8641b8e677c 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -31,7 +31,7 @@ from yt.funcs import get_pbar from particle_deposit cimport gind from yt.utilities.lib.ewah_bool_array cimport \ - ewah_bool_array, ewah_bool_iterator, ewah_map, bool_array + ewah_bool_array, ewah_bool_iterator, ewah_map, bool_array, ewah_word_type #from yt.utilities.lib.ewah_bool_wrap cimport \ from ..utilities.lib.ewah_bool_wrap cimport BoolArrayCollection from libcpp cimport bool @@ -411,6 +411,7 @@ cdef class ParticleBitmap: cdef np.float64_t idds[3] cdef np.int32_t dims[3] cdef np.int64_t file_hash + cdef np.uint64_t directional_max2[3] cdef public np.uint64_t nfiles cdef public np.int32_t index_order1 cdef public np.int32_t index_order2 @@ -456,6 +457,10 @@ cdef class ParticleBitmap: # We use 64-bit masks self.index_order1 = index_order1 self.index_order2 = index_order2 + mi2_max = (1 << self.index_order2) - 1 + self.directional_max2[0] = encode_morton_64bit(mi2_max, 0, 0) + self.directional_max2[1] = encode_morton_64bit(0, mi2_max, 0) + self.directional_max2[2] = encode_morton_64bit(0, 0, mi2_max) # This will be an on/off flag for which morton index values are touched # by particles. # This is the simple way, for now. @@ -799,31 +804,20 @@ cdef class ParticleBitmap: cdef np.uint64_t count, vec_i cdef np.uint64_t total_count = 0 cdef bool_array *buf = NULL + cdef ewah_word_type w this_collection = BoolArrayCollection() cdef ewah_bool_array *refined_arr = NULL - print("Appending to the new BoolArrayCollection", coarse_refined_map.size()) - cdef np.uint64_t ncrm = 0 for it1 in coarse_refined_map: - if ncrm % 1000 == 0: - print(ncrm) - ncrm += 1 mi1 = it1.first refined_arr = &this_collection.ewah_coll[0][mi1] this_collection.ewah_keys[0].set(mi1) this_collection.ewah_refn[0].set(mi1) - count = 0 - vec_i = 0 buf = &it1.second - for vec_i in range(max_mi2_elements): - if buf.get(vec_i) > 0: - count += 1 - refined_arr.set(vec_i) - nsub_mi[0] += 1 - total_count += count + for vec_i in range(buf.sizeInBytes() / sizeof(ewah_word_type)): + w = buf.getWord(vec_i) + refined_arr.addWord(w) out_collection = BoolArrayCollection() - print("Logical or-ing") in_collection._logicalor(this_collection, out_collection) - print("Completed") return out_collection @cython.boundscheck(False) @@ -844,6 +838,8 @@ cdef class ParticleBitmap: cdef np.float64_t clip_pos_l[3], clip_pos_r[3], cell_edge_l, cell_edge_r cdef np.uint64_t ex1[3], ex2[3], ex3[3] cdef np.uint64_t xex_max, yex_max, zex_max + cdef np.uint64_t xiex_min, yiex_min, ziex_min + cdef np.uint64_t xiex_max, yiex_max, ziex_max ex1[0] = xex; ex1[1] = yex; ex1[2] = zex # Check a few special cases for i in range(3): @@ -859,23 +855,29 @@ cdef class ParticleBitmap: LE, dds1, dds2, bounds_l) miex2_max = bounded_morton_split_relative_dds(clip_pos_r[0], clip_pos_r[1], clip_pos_r[2], LE, dds1, dds2, bounds_r) - xex_max = encode_morton_64bit(mi2_max, 0, 0) - yex_max = encode_morton_64bit(0, mi2_max, 0) - zex_max = encode_morton_64bit(0, 0, mi2_max) + xex_max = self.directional_max2[0] + yex_max = self.directional_max2[1] + zex_max = self.directional_max2[2] + xiex_min = miex2_min & xex_max + yiex_min = miex2_min & yex_max + ziex_min = miex2_min & zex_max + xiex_max = miex2_max & xex_max + yiex_max = miex2_max & yex_max + ziex_max = miex2_max & zex_max + # This could *probably* be sped up by iterating over words. for miex2 in range(miex2_min, miex2_max + 1): #miex2 = encode_morton_64bit(xex2, yex2, zex2) #decode_morton_64bit(miex2, ex2) # Let's check all our cases here - if refined_set.get(miex2): continue - if (miex2 & xex_max) < (miex2_min & xex_max): continue - if (miex2 & yex_max) < (miex2_min & yex_max): continue - if (miex2 & zex_max) < (miex2_min & zex_max): continue - if (miex2 & xex_max) > (miex2_max & xex_max): continue - if (miex2 & yex_max) > (miex2_max & yex_max): continue - if (miex2 & zex_max) > (miex2_max & zex_max): continue + if (miex2 & xex_max) < (xiex_min): continue + if (miex2 & xex_max) > (xiex_max): continue + if (miex2 & yex_max) < (yiex_min): continue + if (miex2 & yex_max) > (yiex_max): continue + if (miex2 & zex_max) < (ziex_min): continue + if (miex2 & zex_max) > (ziex_max): continue refined_set.set(miex2) new_nsub += 1 - return new_nsub + return refined_set.numberOfOnes() @cython.boundscheck(False) @cython.wraparound(False) diff --git a/yt/utilities/lib/ewah_bool_array.pxd b/yt/utilities/lib/ewah_bool_array.pxd index 797a681134b..856b6e8d6d7 100644 --- a/yt/utilities/lib/ewah_bool_array.pxd +++ b/yt/utilities/lib/ewah_bool_array.pxd @@ -66,6 +66,7 @@ cdef extern from "ewah.h": void readBuffer(stringstream &incoming, const size_t buffersize) void write(stringstream &out, bint savesizeinbits) void writeBuffer(stringstream &out) + size_t addWord(uword newdata) vector[uword] &getBuffer() # const_iterator begin() # const_iterator end() @@ -80,14 +81,18 @@ cdef extern from "boolarray.h": bool get(size_t pos) void reset() size_t sizeInBits() + size_t sizeInBytes() size_t numberOfOnes() void inplace_logicalxor(BoolArray &other) void inplace_logicalnot() size_t padWithZeroes(size_t totalbits) + uword getWord(size_t pos) + size_t wordinbits -ctypedef EWAHBoolArray[np.uint64_t] ewah_bool_array -ctypedef EWAHBoolArraySetBitForwardIterator[np.uint64_t] ewah_bool_iterator +ctypedef np.uint64_t ewah_word_type +ctypedef EWAHBoolArray[ewah_word_type] ewah_bool_array +ctypedef EWAHBoolArraySetBitForwardIterator[ewah_word_type] ewah_bool_iterator ctypedef vector[size_t] bitset_array ctypedef map[np.uint64_t, ewah_bool_array] ewah_map ctypedef stringstream sstream -ctypedef BoolArray[np.uint64_t] bool_array +ctypedef BoolArray[ewah_word_type] bool_array From 5c30b9a6cf8152c5540825ab0b7c32fa57591c14 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Fri, 22 May 2020 16:15:42 -0500 Subject: [PATCH 26/42] Fixing a flake8 error --- yt/geometry/particle_geometry_handler.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/yt/geometry/particle_geometry_handler.py b/yt/geometry/particle_geometry_handler.py index 6950203dc86..8722ad58e25 100644 --- a/yt/geometry/particle_geometry_handler.py +++ b/yt/geometry/particle_geometry_handler.py @@ -166,8 +166,6 @@ def _initialize_refined_index(self): for d in self.data_files) * 28 sub_mi1 = np.zeros(max_npart, "uint64") sub_mi2 = np.zeros(max_npart, "uint64") - mi1_dds = self.ds.domain_width.max() / (1 << self.regions.index_order1) - mi2_dds = mi1_dds / (1 << self.regions.index_order2) pb = get_pbar("Initializing refined index", len(self.data_files)) mask_threshold = getattr(self, '_index_mask_threshold', 2) count_threshold = getattr(self, '_index_count_threshold', 256) From a66be30c7938ebd7ab4687fccf2c18c18db38ad2 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Fri, 22 May 2020 16:48:49 -0500 Subject: [PATCH 27/42] remove unused unordered_set import --- yt/geometry/particle_oct_container.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index 8641b8e677c..96a4756151d 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -38,7 +38,6 @@ from libcpp cimport bool from libcpp.map cimport map from libcpp.vector cimport vector from libcpp.pair cimport pair -from libcpp.unordered_set cimport unordered_set as uset from cython.operator cimport dereference, preincrement import struct import os From 017768bcb79e0ae689934db38447b46e84b336be Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Fri, 22 May 2020 20:17:30 -0500 Subject: [PATCH 28/42] Fix testing calls; not working yet. --- yt/geometry/tests/test_particle_octree.py | 26 ++++++++++++----------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/yt/geometry/tests/test_particle_octree.py b/yt/geometry/tests/test_particle_octree.py index ab320d2725c..0e193c845b5 100644 --- a/yt/geometry/tests/test_particle_octree.py +++ b/yt/geometry/tests/test_particle_octree.py @@ -133,11 +133,12 @@ def FakeBitmap(npart, nfiles, order1, order2, posgen = yield_fake_decomp(decomp, npart, nfiles, left_edge, right_edge, buff=buff, distrib=distrib) + coll = None for i, (pos, hsml) in enumerate(posgen): - nsub_mi = reg._refined_index_data_file( - pos, hsml, mask, sub_mi1, sub_mi2, i, 0) - reg._set_refined_index_data_file( - sub_mi1, sub_mi2, i, nsub_mi) + nsub_mi, coll = reg._refined_index_data_file( + coll, pos, hsml, mask, sub_mi1, sub_mi2, i, + 0, count_threshold = 1, mask_threshold = 2) + reg.bitmasks.append(i, coll) # Save if file name provided if isinstance(fname, str): reg.save_bitmasks(fname) @@ -175,11 +176,12 @@ def test_bitmap_no_collisions(): sub_mi2 = np.zeros(max_npart, "uint64") posgen = yield_fake_decomp('sliced', npart, nfiles, left_edge, right_edge) + coll = None for i, (pos, hsml) in enumerate(posgen): - nsub_mi = reg._refined_index_data_file( - pos, hsml, mask, sub_mi1, sub_mi2, i, 0) - reg._set_refined_index_data_file( - sub_mi1, sub_mi2, i, nsub_mi) + nsub_mi, coll = reg._refined_index_data_file( + coll, pos, hsml, mask, sub_mi1, sub_mi2, i, + 0, count_threshold = 1, mask_threshold = 2) + reg.bitmasks.append(i, coll) assert_equal(reg.count_refined(i), 0) nr, nm = reg.find_collisions_refined() assert_equal(nr, 0, "%d collisions" % nr) @@ -214,10 +216,10 @@ def test_bitmap_collisions(): sub_mi1 = np.zeros(max_npart, "uint64") sub_mi2 = np.zeros(max_npart, "uint64") for i in range(nfiles): - nsub_mi = reg._refined_index_data_file( - pos, hsml, mask, sub_mi1, sub_mi2, i, 0) - reg._set_refined_index_data_file( - sub_mi1, sub_mi2, i, nsub_mi) + nsub_mi, coll = reg._refined_index_data_file( + None, pos, hsml, mask, sub_mi1, sub_mi2, i, + 0, count_threshold = 1, mask_threshold = 2) + reg.bitmasks.append(i, coll) assert_equal(reg.count_refined(i), ncoll) nr, nm = reg.find_collisions_refined() assert_equal(nr, 2**(3*(order1+order2)), "%d collisions" % nr) From d99d87f02a4f2fdf6b93c5dd95ff6c056f092410 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Mon, 25 May 2020 17:07:17 -0500 Subject: [PATCH 29/42] Missed a logic check --- yt/geometry/particle_oct_container.pyx | 3 +++ yt/utilities/lib/ewah_bool_wrap.pyx | 20 +++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index 96a4756151d..8b1db6c3e05 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -682,7 +682,10 @@ cdef class ParticleBitmap: cdef np.uint64_t nset = 0, nfully_enclosed = 0, n_calls = 0 mi1_max = (1 << self.index_order1) - 1 mi2_max = (1 << self.index_order2) - 1 + cdef np.uint64_t max_mi1_elements = 1 << (3*self.index_order1) cdef np.uint64_t max_mi2_elements = 1 << (3*self.index_order2) + for i in range(max_mi1_elements): + refined_count[i] = 0 # Copy things from structure (type cast) for i in range(3): LE[i] = self.left_edge[i] diff --git a/yt/utilities/lib/ewah_bool_wrap.pyx b/yt/utilities/lib/ewah_bool_wrap.pyx index 9cfc5cfd2d6..742478ab210 100644 --- a/yt/utilities/lib/ewah_bool_wrap.pyx +++ b/yt/utilities/lib/ewah_bool_wrap.pyx @@ -870,15 +870,15 @@ cdef class BoolArrayCollection: return self._count_coarse() cdef void _logicalor(self, BoolArrayCollection solf, BoolArrayCollection out): - cdef ewah_bool_array *ewah_keys1 = self.ewah_keys - cdef ewah_bool_array *ewah_refn1 = self.ewah_refn - cdef ewahmap *ewah_coll1 = self.ewah_coll - cdef ewah_bool_array *ewah_keys2 = solf.ewah_keys - cdef ewah_bool_array *ewah_refn2 = solf.ewah_refn - cdef ewahmap *ewah_coll2 = solf.ewah_coll - cdef ewah_bool_array *ewah_keys3 = out.ewah_keys - cdef ewah_bool_array *ewah_refn3 = out.ewah_refn - cdef ewahmap *ewah_coll3 = out.ewah_coll + cdef ewah_bool_array *ewah_keys1 = self.ewah_keys + cdef ewah_bool_array *ewah_refn1 = self.ewah_refn + cdef ewahmap *ewah_coll1 = self.ewah_coll + cdef ewah_bool_array *ewah_keys2 = solf.ewah_keys + cdef ewah_bool_array *ewah_refn2 = solf.ewah_refn + cdef ewahmap *ewah_coll2 = solf.ewah_coll + cdef ewah_bool_array *ewah_keys3 = out.ewah_keys + cdef ewah_bool_array *ewah_refn3 = out.ewah_refn + cdef ewahmap *ewah_coll3 = out.ewah_coll cdef ewahmap_it it_map1, it_map2 cdef ewah_bool_array mi1_ewah1, mi1_ewah2 cdef np.uint64_t mi1 @@ -901,6 +901,8 @@ cdef class BoolArrayCollection: if it_map1 != ewah_coll1[0].end(): mi1_ewah1 = dereference(it_map1).second mi1_ewah1.logicalor(mi1_ewah2, ewah_coll3[0][mi1]) + else: + ewah_coll3[0][mi1] = mi1_ewah2 preincrement(it_map2) cdef void _append(self, BoolArrayCollection solf): From c5da9113d52437ece316d433d3dd466950898620 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Tue, 26 May 2020 10:40:12 -0500 Subject: [PATCH 30/42] Use bounded_morton_split_dds in coarse indexing --- yt/geometry/particle_oct_container.pyx | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index 8b1db6c3e05..2236996a463 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -496,8 +496,10 @@ cdef class ParticleBitmap: cdef np.uint64_t mi_split[3] cdef np.float64_t ppos[3] cdef np.float64_t s_ppos[3] # shifted ppos + cdef np.float64_t clip_pos_l[3] + cdef np.float64_t clip_pos_r[3] cdef int skip - cdef np.uint64_t bounds[3][2] + cdef np.uint64_t bounds[2][3] cdef np.uint64_t xex, yex, zex cdef np.float64_t LE[3] cdef np.float64_t RE[3] @@ -564,15 +566,17 @@ cdef class ParticleBitmap: s_ppos[2] = ppos[2] + axiterv[2][zi] # OK, now we compute the left and right edges for this shift. for i in range(3): - # Note that we cast here to int64_t because this could be negative - bounds[i][0] = i64max(((s_ppos[i] - LE[i] - radius)/dds[i]), 0) - bounds[i][1] = i64min(((s_ppos[i] - LE[i] + radius)/dds[i]), mi_max) + 1 + clip_pos_l[i] = fmax(s_ppos[i] - radius, LE[i] + dds[i]/10) + clip_pos_r[i] = fmin(s_ppos[i] + radius, RE[i] - dds[i]/10) + + bounded_morton_split_dds(clip_pos_l[0], clip_pos_l[1], clip_pos_l[2], LE, dds, bounds[0]) + bounded_morton_split_dds(clip_pos_r[0], clip_pos_r[1], clip_pos_r[2], LE, dds, bounds[1]) # We go to the upper bound plus one so that we have *inclusive* loops -- the upper bound # is the cell *index*, so we want to make sure we include that cell. This is also why # we don't need to worry about mi_max being the max index rather than the cell count. - for xex in range(bounds[0][0], bounds[0][1]): - for yex in range(bounds[1][0], bounds[1][1]): - for zex in range(bounds[2][0], bounds[2][1]): + for xex in range(bounds[0][0], bounds[1][0]): + for yex in range(bounds[0][1], bounds[1][1]): + for zex in range(bounds[0][2], bounds[1][2]): miex = encode_morton_64bit(xex, yex, zex) mask[miex] = 1 particle_counts[miex] += 1 From ac32bb0cb2dd3b486274ecd11ccfd3255759bfe5 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Tue, 26 May 2020 11:33:14 -0500 Subject: [PATCH 31/42] Fencepost error --- yt/geometry/particle_oct_container.pyx | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index 2236996a463..64e8855971d 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -568,15 +568,14 @@ cdef class ParticleBitmap: for i in range(3): clip_pos_l[i] = fmax(s_ppos[i] - radius, LE[i] + dds[i]/10) clip_pos_r[i] = fmin(s_ppos[i] + radius, RE[i] - dds[i]/10) - bounded_morton_split_dds(clip_pos_l[0], clip_pos_l[1], clip_pos_l[2], LE, dds, bounds[0]) bounded_morton_split_dds(clip_pos_r[0], clip_pos_r[1], clip_pos_r[2], LE, dds, bounds[1]) # We go to the upper bound plus one so that we have *inclusive* loops -- the upper bound # is the cell *index*, so we want to make sure we include that cell. This is also why # we don't need to worry about mi_max being the max index rather than the cell count. - for xex in range(bounds[0][0], bounds[1][0]): - for yex in range(bounds[0][1], bounds[1][1]): - for zex in range(bounds[0][2], bounds[1][2]): + for xex in range(bounds[0][0], bounds[1][0] + 1): + for yex in range(bounds[0][1], bounds[1][1] + 1): + for zex in range(bounds[0][2], bounds[1][2] + 1): miex = encode_morton_64bit(xex, yex, zex) mask[miex] = 1 particle_counts[miex] += 1 From e8ce92b2e4b252e12c8a616e79c3448139fe03db Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Tue, 26 May 2020 12:55:41 -0500 Subject: [PATCH 32/42] Check for None in append() --- yt/utilities/lib/ewah_bool_wrap.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/yt/utilities/lib/ewah_bool_wrap.pyx b/yt/utilities/lib/ewah_bool_wrap.pyx index 742478ab210..1c782f4b878 100644 --- a/yt/utilities/lib/ewah_bool_wrap.pyx +++ b/yt/utilities/lib/ewah_bool_wrap.pyx @@ -322,6 +322,7 @@ cdef class FileBitmasks: return out def append(self, np.uint32_t ifile, BoolArrayCollection solf): + if solf is None: return self._append(ifile, solf) cdef void _append(self, np.uint32_t ifile, BoolArrayCollection solf): From 588b50e527880707b4f4439ab0bce15dfa97c7ee Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Tue, 26 May 2020 13:36:21 -0500 Subject: [PATCH 33/42] Changing to uint32_t for Clang --- yt/utilities/lib/ewah_bool_array.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt/utilities/lib/ewah_bool_array.pxd b/yt/utilities/lib/ewah_bool_array.pxd index 856b6e8d6d7..9b55e626d9a 100644 --- a/yt/utilities/lib/ewah_bool_array.pxd +++ b/yt/utilities/lib/ewah_bool_array.pxd @@ -89,7 +89,7 @@ cdef extern from "boolarray.h": uword getWord(size_t pos) size_t wordinbits -ctypedef np.uint64_t ewah_word_type +ctypedef np.uint32_t ewah_word_type ctypedef EWAHBoolArray[ewah_word_type] ewah_bool_array ctypedef EWAHBoolArraySetBitForwardIterator[ewah_word_type] ewah_bool_iterator ctypedef vector[size_t] bitset_array From 6488e10072d54192a6860d79e6fddb941fd65876 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Tue, 26 May 2020 15:07:29 -0500 Subject: [PATCH 34/42] Try to be more careful with uint/int distinctions. --- yt/geometry/particle_oct_container.pyx | 44 ++++++++++++++------------ yt/utilities/lib/ewah_bool_wrap.pxd | 16 +++++----- yt/utilities/lib/ewah_bool_wrap.pyx | 44 ++++++++++++-------------- 3 files changed, 52 insertions(+), 52 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index 64e8855971d..cfef5378d95 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -509,7 +509,7 @@ cdef class ParticleBitmap: cdef np.float64_t radius cdef np.uint8_t[:] mask = self.masks[:, file_id] cdef np.uint64_t[:] particle_counts = self.particle_counts - cdef np.int64_t msize = (1 << (self.index_order1 * 3)) + cdef np.uint64_t msize = (1 << (self.index_order1 * 3)) cdef int axiter[3][2] cdef np.float64_t axiterv[3][2] mi_max = (1 << self.index_order1) - 1 @@ -643,7 +643,8 @@ cdef class ParticleBitmap: np.uint64_t count_threshold, np.uint8_t mask_threshold ): # Initialize - cdef np.int64_t i, p, sorted_ind + cdef np.int64_t p, sorted_ind + cdef np.uint64_t i cdef np.uint64_t mi1, mi2 cdef np.float64_t ppos[3] cdef np.float64_t s_ppos[3] # shifted ppos @@ -961,7 +962,7 @@ cdef class ParticleBitmap: def calcsize_bitmasks(self): # TODO: All cython cdef bytes serial_BAC - cdef int ifile + cdef np.uint64_t ifile cdef int out = 0 out += struct.calcsize('Q') # Bitmaps for each file @@ -983,7 +984,7 @@ cdef class ParticleBitmap: def save_bitmasks(self, fname): cdef bytes serial_BAC - cdef int ifile + cdef np.uint64_t ifile f = open(fname,'wb') # Header f.write(struct.pack('Q', _bitmask_version)) @@ -1057,7 +1058,7 @@ cdef class ParticleBitmap: return read_flag def print_info(self): - cdef int ifile + cdef np.uint64_t ifile for ifile in range(self.nfiles): self.bitmasks.print_info(ifile, "File: %03d" % ifile) @@ -1080,7 +1081,8 @@ cdef class ParticleBitmap: cdef vector[size_t] vec_totref cdef vector[size_t].iterator it_mi1 cdef int nm = 0, nc = 0 - cdef int ifile + cdef np.uint64_t ifile, nbitmasks + nbitmasks = len(self.bitmasks) # Locate all indices with second level refinement for ifile in range(self.nfiles): arr = ( self.bitmasks.ewah_refn)[ifile][0] @@ -1092,7 +1094,7 @@ cdef class ParticleBitmap: mi1 = dereference(it_mi1) arr_any.reset() arr_two.reset() - for ifile in range(len(self.bitmasks)): + for ifile in range(nbitmasks): if self.bitmasks._isref(ifile, mi1) == 1: arr = ( self.bitmasks.ewah_coll)[ifile][0][mi1] arr_any.logicaland(arr, arr_two) # Indices in previous files @@ -1249,7 +1251,7 @@ cdef class ParticleBitmap: def mask_to_files(self, BoolArrayCollection mm_s): cdef FileBitmasks mm_d = self.bitmasks - cdef np.int32_t ifile + cdef np.uint32_t ifile cdef np.ndarray[np.uint8_t, ndim=1] file_mask_p file_mask_p = np.zeros(self.nfiles, dtype="uint8") # Compare with mask of particles @@ -1264,7 +1266,7 @@ cdef class ParticleBitmap: def masks_to_files(self, BoolArrayCollection mm_s, BoolArrayCollection mm_g): cdef FileBitmasks mm_d = self.bitmasks - cdef np.int32_t ifile + cdef np.uint32_t ifile cdef np.ndarray[np.uint8_t, ndim=1] file_mask_p cdef np.ndarray[np.uint8_t, ndim=1] file_mask_g file_mask_p = np.zeros(self.nfiles, dtype="uint8") @@ -1327,13 +1329,15 @@ cdef class ParticleBitmap: cdef ewah_bool_array *ewah_base if base_mask is not None: ewah_base = base_mask.ewah_keys + else: + ewah_base = NULL cdef ewah_bool_iterator *iter_set = new ewah_bool_iterator(ewah_slct[0].begin()) cdef ewah_bool_iterator *iter_end = new ewah_bool_iterator(ewah_slct[0].end()) cdef np.ndarray[np.uint8_t, ndim=1] slct_arr slct_arr = np.zeros((1 << (self.index_order1 * 3)),'uint8') while iter_set[0] != iter_end[0]: mi = dereference(iter_set[0]) - if base_mask is not None and ewah_base[0].get(mi) == 0: + if ewah_base != NULL and ewah_base[0].get(mi) == 0: octree._index_base_roots[croot] = 0 slct_arr[mi] = 2 else: @@ -1345,7 +1349,7 @@ cdef class ParticleBitmap: croot += 1 preincrement(iter_set[0]) assert(croot == nroot) - if base_mask is not None: + if ewah_base != NULL: assert(np.sum(octree._index_base_roots) == ewah_base[0].numberOfOnes()) # Get morton indices for all particles in this file and those # contaminating cells it has majority control of. @@ -1496,7 +1500,7 @@ cdef class ParticleBitmapSelector: rpos[i] = self.DRE[i] - self.bitmap.dds_mi2[i]/2.0 sbbox = self.selector.select_bbox_edge(pos, rpos) if sbbox == 1: - for mi1 in range(self.s1): + for mi1 in range(self.s1): mm_s0._set_coarse(mi1) mm_s0._compress(mm_s) return @@ -1513,7 +1517,7 @@ cdef class ParticleBitmapSelector: def find_files(self, np.ndarray[np.uint8_t, ndim=1] file_mask_p, np.ndarray[np.uint8_t, ndim=1] file_mask_g): - cdef int i + cdef np.uint64_t i cdef np.int32_t level = 0 cdef np.uint64_t mi1 mi1 = ~(0) @@ -1547,7 +1551,7 @@ cdef class ParticleBitmapSelector: @cython.wraparound(False) @cython.cdivision(True) cdef bint is_refined_files(self, np.uint64_t mi1): - cdef int i + cdef np.uint64_t i if self.bitmap.collisions._isref(mi1): # Don't refine if files all selected already for i in range(self.nfiles): @@ -1574,7 +1578,7 @@ cdef class ParticleBitmapSelector: @cython.cdivision(True) @cython.initializedcheck(False) cdef void set_files_coarse(self, np.uint64_t mi1): - cdef int i + cdef np.uint64_t i cdef bint flag_ref = self.is_refined(mi1) # Flag files at coarse level if flag_ref == 0: @@ -1601,7 +1605,7 @@ cdef class ParticleBitmapSelector: @cython.cdivision(True) @cython.initializedcheck(False) cdef void set_files_refined(self, np.uint64_t mi1, np.uint64_t mi2): - cdef int i + cdef np.uint64_t i # Flag files for i in range(self.nfiles): if self.file_mask_p[i] == 0: @@ -1616,14 +1620,14 @@ cdef class ParticleBitmapSelector: @cython.cdivision(True) @cython.initializedcheck(False) cdef void add_neighbors_coarse(self, np.uint64_t mi1): - cdef int m + cdef np.uint64_t m cdef np.uint32_t ntot cdef np.uint64_t mi1_n ntot = morton_neighbors_coarse(mi1, self.max_index1, self.periodicity, self.ngz, self.neighbors, self.ind1_n, self.neighbor_list1) - for m in range(ntot): + for m in range(ntot): mi1_n = self.neighbor_list1[m] self.coarse_ghosts_bool[mi1_n] = 1 @@ -1632,14 +1636,14 @@ cdef class ParticleBitmapSelector: @cython.cdivision(True) @cython.initializedcheck(False) cdef void set_files_neighbors_coarse(self, np.uint64_t mi1): - cdef int i, m + cdef np.uint64_t i, m cdef np.uint32_t ntot cdef np.uint64_t mi1_n ntot = morton_neighbors_coarse(mi1, self.max_index1, self.periodicity, self.ngz, self.neighbors, self.ind1_n, self.neighbor_list1) - for m in range(ntot): + for m in range(ntot): mi1_n = self.neighbor_list1[m] for i in range(self.nfiles): if self.file_mask_g[i] == 0: diff --git a/yt/utilities/lib/ewah_bool_wrap.pxd b/yt/utilities/lib/ewah_bool_wrap.pxd index 589d56a028c..4feeaf31e4f 100644 --- a/yt/utilities/lib/ewah_bool_wrap.pxd +++ b/yt/utilities/lib/ewah_bool_wrap.pxd @@ -36,9 +36,9 @@ cdef class FileBitmasks: cdef bint _get_coarse(self, np.uint32_t ifile, np.uint64_t i1) cdef void _get_coarse_array(self, np.uint32_t ifile, np.uint64_t imax, np.uint8_t[:] arr) except * cdef bint _isref(self, np.uint32_t ifile, np.uint64_t i) - cdef int _count_total(self, np.uint32_t ifile) - cdef int _count_refined(self, np.uint32_t ifile) - cdef int _count_coarse(self, np.uint32_t ifile) + cdef np.uint64_t _count_total(self, np.uint32_t ifile) + cdef np.uint64_t _count_refined(self, np.uint32_t ifile) + cdef np.uint64_t _count_coarse(self, np.uint32_t ifile) cdef void _append(self, np.uint32_t ifile, BoolArrayCollection solf) cdef bint _intersects(self, np.uint32_t ifile, BoolArrayCollection solf) cdef void _logicalxor(self, np.uint32_t ifile, BoolArrayCollection solf, BoolArrayCollection out) @@ -72,9 +72,9 @@ cdef class BoolArrayCollection: cdef bint _contains(self, np.uint64_t i) cdef bint _isref(self, np.uint64_t i) cdef void _ewah_coarse(self) - cdef int _count_total(self) - cdef int _count_refined(self) - cdef int _count_coarse(self) + cdef np.uint64_t _count_total(self) + cdef np.uint64_t _count_refined(self) + cdef np.uint64_t _count_coarse(self) cdef void _append(self, BoolArrayCollection solf) cdef void _logicalor(self, BoolArrayCollection solf, BoolArrayCollection out) cdef bint _intersects(self, BoolArrayCollection solf) @@ -110,8 +110,8 @@ cdef class BoolArrayCollectionUncompressed: cdef bint _get(self, np.uint64_t i1, np.uint64_t i2=*) cdef bint _get_coarse(self, np.uint64_t i1) cdef bint _isref(self, np.uint64_t i) - cdef int _count_total(self) - cdef int _count_refined(self) + cdef np.uint64_t _count_total(self) + cdef np.uint64_t _count_refined(self) cdef void _append(self, BoolArrayCollectionUncompressed solf) cdef bint _intersects(self, BoolArrayCollectionUncompressed solf) cdef void _compress(self, BoolArrayCollection solf) diff --git a/yt/utilities/lib/ewah_bool_wrap.pyx b/yt/utilities/lib/ewah_bool_wrap.pyx index 1c782f4b878..05cbe86ce4b 100644 --- a/yt/utilities/lib/ewah_bool_wrap.pyx +++ b/yt/utilities/lib/ewah_bool_wrap.pyx @@ -306,19 +306,17 @@ cdef class FileBitmasks: def count_refined(self, ifile): return self._count_refined(ifile) - cdef int _count_coarse(self, np.uint32_t ifile): + cdef np.uint64_t _count_coarse(self, np.uint32_t ifile): return self._count_total(ifile) - self._count_refined(ifile) - cdef int _count_total(self, np.uint32_t ifile): + cdef np.uint64_t _count_total(self, np.uint32_t ifile): cdef ewah_bool_array *ewah_keys = ( self.ewah_keys)[ifile] - cdef int out - out = ewah_keys[0].numberOfOnes() + cdef np.uint64_t out = ewah_keys[0].numberOfOnes() return out - cdef int _count_refined(self, np.uint32_t ifile): + cdef np.uint64_t _count_refined(self, np.uint32_t ifile): cdef ewah_bool_array *ewah_refn = ( self.ewah_refn)[ifile] - cdef int out - out = ewah_refn[0].numberOfOnes() + cdef np.uint64_t out = ewah_refn[0].numberOfOnes() return out def append(self, np.uint32_t ifile, BoolArrayCollection solf): @@ -842,29 +840,26 @@ cdef class BoolArrayCollection: def ewah_coarse(self): return self._ewah_coarse() - cdef int _count_total(self): + cdef np.uint64_t _count_total(self): cdef ewah_bool_array *ewah_keys = self.ewah_keys - cdef int out - out = ewah_keys.numberOfOnes() + cdef np.uint64_t out = ewah_keys.numberOfOnes() return out def count_total(self): return self._count_total() - cdef int _count_refined(self): + cdef np.uint64_t _count_refined(self): cdef ewah_bool_array *ewah_refn = self.ewah_refn - cdef int out - out = ewah_refn.numberOfOnes() + cdef np.uint64_t out = ewah_refn.numberOfOnes() return out def count_refined(self): return self._count_refined() - cdef int _count_coarse(self): + cdef np.uint64_t _count_coarse(self): self._ewah_coarse() cdef ewah_bool_array *ewah_coar = self.ewah_coar - cdef int out - out = ewah_coar.numberOfOnes() + cdef np.uint64_t out = ewah_coar.numberOfOnes() return out def count_coarse(self): @@ -1423,18 +1418,18 @@ cdef class BoolArrayCollectionUncompressed: cdef bitarrtype *ewah_refn = self.ewah_refn return ewah_refn[i] - cdef int _count_total(self): + cdef np.uint64_t _count_total(self): cdef bitarrtype *ewah_keys = self.ewah_keys cdef np.uint64_t i - cdef int out = 0 + cdef np.uint64_t out = 0 for i in range(self.nele1): out += ewah_keys[i] return out - cdef int _count_refined(self): + cdef np.uint64_t _count_refined(self): cdef bitarrtype *ewah_refn = self.ewah_refn cdef np.uint64_t i - cdef int out = 0 + cdef np.uint64_t out = 0 for i in range(self.nele1): out += ewah_refn[i] return out @@ -1488,6 +1483,7 @@ cdef class BoolArrayCollectionUncompressed: break if (mi1 < self.nele1): return 0 + mi1 = self.nele1 # This is to get rid of a warning # Intersection at refined level for mi1 in range(self.nele1): if (ewah_refn1[mi1] == 1) and (ewah_refn2[mi1] == 1): @@ -1516,8 +1512,8 @@ cdef class BoolArrayCollectionUncompressed: del ewah_coll def print_info(self, prefix=''): - cdef int nrefn = self._count_refined() - cdef int nkeys = self._count_total() + cdef np.uint64_t nrefn = self._count_refined() + cdef np.uint64_t nkeys = self._count_total() print("{}{: 8d} coarse, {: 8d} refined, {: 8d} total".format(prefix, nkeys - nrefn, nrefn, @@ -1657,7 +1653,7 @@ cdef class SparseUnorderedRefinedBitmaskVector: self.total = 0 cdef to_array(self): - cdef int i + cdef np.uint64_t i cdef np.ndarray[np.uint64_t, ndim=2] rv self._remove_duplicates() rv = np.empty((self.entries.size(),2),dtype='uint64') @@ -1730,7 +1726,7 @@ cdef class SparseUnorderedRefinedBitmaskSet: self.entries.clear() cdef to_array(self): - cdef int i + cdef np.uint64_t i cdef np.ndarray[np.uint64_t, ndim=2] rv rv = np.empty((self.entries.size(),2),dtype='uint64') i = 0 From 26a4ed4df1e062d133aaebb9c2e52e6bd05d1182 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Tue, 26 May 2020 15:35:38 -0500 Subject: [PATCH 35/42] Explicitly cast to uword --- yt/utilities/lib/ewahboolarray/boolarray.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt/utilities/lib/ewahboolarray/boolarray.h b/yt/utilities/lib/ewahboolarray/boolarray.h index 44fdbd6b8a9..fa7da1c1ecf 100644 --- a/yt/utilities/lib/ewahboolarray/boolarray.h +++ b/yt/utilities/lib/ewahboolarray/boolarray.h @@ -322,7 +322,7 @@ class BoolArray { size_t numberOfOnes() const { size_t count = 0; for (size_t i = 0; i < buffer.size(); ++i) { - count += countOnes(buffer[i]); + count += countOnes((uword) buffer[i]); } return count; } From e29599d6e09c318539dd39f8521a360e138fdd22 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Wed, 27 May 2020 10:36:15 -0500 Subject: [PATCH 36/42] Update EWAH to 88b25a3345b82353ccd97a7de6064e6c179a7cc2 --- yt/utilities/lib/ewahboolarray/README | 2 +- yt/utilities/lib/ewahboolarray/boolarray.h | 823 ++-- yt/utilities/lib/ewahboolarray/ewah.h | 3832 +++++++++-------- yt/utilities/lib/ewahboolarray/ewahutil.h | 262 +- .../lib/ewahboolarray/runninglengthword.h | 950 ++-- 5 files changed, 3186 insertions(+), 2683 deletions(-) diff --git a/yt/utilities/lib/ewahboolarray/README b/yt/utilities/lib/ewahboolarray/README index 7f8803852bc..b86d316c9ad 100644 --- a/yt/utilities/lib/ewahboolarray/README +++ b/yt/utilities/lib/ewahboolarray/README @@ -3,6 +3,6 @@ available at: https://github.com/lemire/EWAHBoolArray -Currently this is at revision 80881379f8a582f45dda1be9edfc84d244846427. +Currently this is at revision 88b25a3345b82353ccd97a7de6064e6c179a7cc2 This code is available under the Apache2.0 license. diff --git a/yt/utilities/lib/ewahboolarray/boolarray.h b/yt/utilities/lib/ewahboolarray/boolarray.h index fa7da1c1ecf..4a607adf7d4 100644 --- a/yt/utilities/lib/ewahboolarray/boolarray.h +++ b/yt/utilities/lib/ewahboolarray/boolarray.h @@ -15,423 +15,474 @@ #include #include -using namespace std; - - // uncomment this for debugging //#define EWAHASSERT /** * A dynamic bitset implementation. (without compression). */ -template -class BoolArray { +template class BoolArray { public: - BoolArray(const size_t n, const uword initval = 0) : - buffer(n / wordinbits + (n % wordinbits == 0 ? 0 : 1), initval), - sizeinbits(n) { - } - - BoolArray() : - buffer(), sizeinbits(0) { - } - - BoolArray(const BoolArray & ba) : - buffer(ba.buffer), sizeinbits(ba.sizeinbits) { - } - static BoolArray bitmapOf(size_t n, ...) { - BoolArray ans; - va_list vl; - va_start(vl, n); - for (size_t i = 0; i < n; i++) { - ans.set(static_cast(va_arg(vl, int))); - } - va_end(vl); - return ans; - } - size_t sizeInBytes() const { - return buffer.size() * sizeof(uword); - } - - void read(istream & in) { - sizeinbits = 0; - in.read(reinterpret_cast (&sizeinbits), sizeof(sizeinbits)); - buffer.resize( - sizeinbits / wordinbits - + (sizeinbits % wordinbits == 0 ? 0 : 1)); - if(buffer.size() == 0) return; - in.read(reinterpret_cast (&buffer[0]), - static_cast(buffer.size() * sizeof(uword))); - } - - void readBuffer(istream & in, const size_t size) { - buffer.resize(size); - sizeinbits = size * sizeof(uword) * 8; - if(buffer.empty()) return; - in.read(reinterpret_cast (&buffer[0]), - buffer.size() * sizeof(uword)); - } - - void setSizeInBits(const size_t sizeib) { - sizeinbits = sizeib; - } - - void write(ostream & out) { - write(out, sizeinbits); - } - - void write(ostream & out, const size_t numberofbits) const { - const size_t size = numberofbits / wordinbits + (numberofbits - % wordinbits == 0 ? 0 : 1); - out.write(reinterpret_cast (&numberofbits), - sizeof(numberofbits)); - if(numberofbits == 0) return; - out.write(reinterpret_cast (&buffer[0]), - static_cast(size * sizeof(uword))); - } - - void writeBuffer(ostream & out, const size_t numberofbits) const { - const size_t size = numberofbits / wordinbits + (numberofbits - % wordinbits == 0 ? 0 : 1); - if(size == 0) return; + BoolArray(const size_t n, const uword initval = 0) + : buffer(n / wordinbits + (n % wordinbits == 0 ? 0 : 1), initval), + sizeinbits(n) {} + + BoolArray() : buffer(), sizeinbits(0) {} + + BoolArray(const BoolArray &ba) + : buffer(ba.buffer), sizeinbits(ba.sizeinbits) {} + static BoolArray bitmapOf(size_t n, ...) { + BoolArray ans; + va_list vl; + va_start(vl, n); + for (size_t i = 0; i < n; i++) { + ans.set(static_cast(va_arg(vl, int))); + } + va_end(vl); + return ans; + } + size_t sizeInBytes() const { return buffer.size() * sizeof(uword); } + + void read(std::istream &in) { + sizeinbits = 0; + in.read(reinterpret_cast(&sizeinbits), sizeof(sizeinbits)); + buffer.resize(sizeinbits / wordinbits + + (sizeinbits % wordinbits == 0 ? 0 : 1)); + if (buffer.size() == 0) + return; + in.read(reinterpret_cast(&buffer[0]), + static_cast(buffer.size() * sizeof(uword))); + } + + void readBuffer(std::istream &in, const size_t size) { + buffer.resize(size); + sizeinbits = size * sizeof(uword) * 8; + if (buffer.empty()) + return; + in.read(reinterpret_cast(&buffer[0]), + buffer.size() * sizeof(uword)); + } + + void setSizeInBits(const size_t sizeib) { sizeinbits = sizeib; } + + void write(std::ostream &out) { write(out, sizeinbits); } + + void write(std::ostream &out, const size_t numberofbits) const { + const size_t size = + numberofbits / wordinbits + (numberofbits % wordinbits == 0 ? 0 : 1); + out.write(reinterpret_cast(&numberofbits), + sizeof(numberofbits)); + if (numberofbits == 0) + return; + out.write(reinterpret_cast(&buffer[0]), + static_cast(size * sizeof(uword))); + } + + void writeBuffer(std::ostream &out, const size_t numberofbits) const { + const size_t size = + numberofbits / wordinbits + (numberofbits % wordinbits == 0 ? 0 : 1); + if (size == 0) + return; #ifdef EWAHASSERT - assert(buffer.size() >= size); + assert(buffer.size() >= size); #endif - out.write(reinterpret_cast (&buffer[0]), - size * sizeof(uword)); - } - - size_t sizeOnDisk() const { - size_t size = sizeinbits / wordinbits - + (sizeinbits % wordinbits == 0 ? 0 : 1); - return sizeof(sizeinbits) + size * sizeof(uword); - } - - BoolArray& operator=(const BoolArray & x) { - this->buffer = x.buffer; - this->sizeinbits = x.sizeinbits; - return *this; - } - - bool operator==(const BoolArray & x) const { - if (sizeinbits != x.sizeinbits) - return false; - for (size_t k = 0; k < buffer.size(); ++k) - if (buffer[k] != x.buffer[k]) - return false; - return true; - } - - bool operator!=(const BoolArray & x) const { - return !operator==(x); - } - - void setWord(const size_t pos, const uword val) { + out.write(reinterpret_cast(&buffer[0]), size * sizeof(uword)); + } + + size_t sizeOnDisk() const { + size_t size = + sizeinbits / wordinbits + (sizeinbits % wordinbits == 0 ? 0 : 1); + return sizeof(sizeinbits) + size * sizeof(uword); + } + + BoolArray &operator=(const BoolArray &x) { + this->buffer = x.buffer; + this->sizeinbits = x.sizeinbits; + return *this; + } + + bool operator==(const BoolArray &x) const { + if (sizeinbits != x.sizeinbits) + return false; + for (size_t k = 0; k < buffer.size(); ++k) + if (buffer[k] != x.buffer[k]) + return false; + return true; + } + + bool operator!=(const BoolArray &x) const { return !operator==(x); } + + void setWord(const size_t pos, const uword val) { #ifdef EWAHASSERT - assert(pos < buffer.size()); + assert(pos < buffer.size()); #endif - buffer[pos] = val; - } + buffer[pos] = val; + } - void addWord(const uword val) { - if (sizeinbits % wordinbits != 0) - throw invalid_argument("you probably didn't want to do this"); - sizeinbits += wordinbits; - buffer.push_back(val); - } + void addWord(const uword val) { + if (sizeinbits % wordinbits != 0) + throw std::invalid_argument("you probably didn't want to do this"); + sizeinbits += wordinbits; + buffer.push_back(val); + } - uword getWord(const size_t pos) const { + uword getWord(const size_t pos) const { #ifdef EWAHASSERT - assert(pos < buffer.size()); + assert(pos < buffer.size()); #endif - return buffer[pos]; - } + return buffer[pos]; + } - /** - * set to true (whether it was already set to true or not) - * - * This is an expensive (random access) API, you really ought to - * prepare a new word and then append it. - */ - void set(const size_t pos) { - if(pos >= sizeinbits) padWithZeroes(pos+1); - buffer[pos / wordinbits] |= (static_cast (1) << (pos - % wordinbits)); - } - - /** - * set to false (whether it was already set to false or not) - * - * This is an expensive (random access) API, you really ought to - * prepare a new word and then append it. - */ - void unset(const size_t pos) { - if(pos < sizeinbits) - buffer[pos / wordinbits] |= ~(static_cast (1) << (pos - % wordinbits)); - } - - /** - * true of false? (set or unset) - */ - bool get(const size_t pos) const { + /** + * set to true (whether it was already set to true or not) + */ + void set(const size_t pos) { + if (pos >= sizeinbits) + padWithZeroes(pos + 1); + buffer[pos / wordinbits] |= (static_cast(1) << (pos % wordinbits)); + } + + /** + * set to false (whether it was already set to false or not) + * + */ + void unset(const size_t pos) { + if (pos < sizeinbits) + buffer[pos / wordinbits] &= + ~(static_cast(1) << (pos % wordinbits)); + } + + /** + * true of false? (set or unset) + */ + bool get(const size_t pos) const { #ifdef EWAHASSERT - assert(pos / wordinbits < buffer.size()); + assert(pos / wordinbits < buffer.size()); #endif - return (buffer[pos / wordinbits] & (static_cast (1) << (pos - % wordinbits))) != 0; - } - - - - /** - * set all bits to 0 - */ - void reset() { - if(buffer.size() > 0) memset(&buffer[0], 0, sizeof(uword) * buffer.size()); - sizeinbits = 0; - } - - size_t sizeInBits() const { - return sizeinbits; - } - - ~BoolArray() { - } - - /** - * Computes the logical and and writes to the provided BoolArray (out). - * The current bitmaps is unchanged. - */ - void logicaland(const BoolArray & ba, BoolArray & out) { - if(ba.buffer.size() < buffer.size()) - out.setToSize(ba); - else - out.setToSize(*this); - for (size_t i = 0; i < out.buffer.size(); ++i) - out.buffer[i] = buffer[i] & ba.buffer[i]; - } - - void inplace_logicaland(const BoolArray & ba) { - if(ba.buffer.size() < buffer.size()) - setToSize(ba); - for (size_t i = 0; i < buffer.size(); ++i) - buffer[i] = buffer[i] & ba.buffer[i]; - } - - /** - * Computes the logical andnot and writes to the provided BoolArray (out). - * The current bitmaps is unchanged. - */ - void logicalandnot(const BoolArray & ba, BoolArray & out) { - out.setToSize(*this); - size_t upto = out.buffer.size() < ba.buffer.size() ? out.buffer.size() : ba.buffer.size(); - for (size_t i = 0; i < upto; ++i) - out.buffer[i] = buffer[i] & (~ba.buffer[i]); - for (size_t i = upto; i < out.buffer.size(); ++i) - out.buffer[i] = buffer[i]; - out.clearBogusBits(); - } - - void inplace_logicalandnot(const BoolArray & ba) { - size_t upto = buffer.size() < ba.buffer.size() ? buffer.size() : ba.buffer.size(); - for (size_t i = 0; i < upto; ++i) - buffer[i] = buffer[i] & (~ba.buffer[i]); - clearBogusBits(); - } - - /** - * Computes the logical or and writes to the provided BoolArray (out). - * The current bitmaps is unchanged. - */ - void logicalor(const BoolArray & ba, BoolArray & out) { - const BoolArray * smallest; - const BoolArray * largest; - if(ba.buffer.size() > buffer.size()) { - smallest = this; - largest = &ba; - out.setToSize(ba); - } else { - smallest = &ba; - largest = this; - out.setToSize(*this); - } - for (size_t i = 0; i < smallest->buffer.size(); ++i) - out.buffer[i] = buffer[i] | ba.buffer[i]; - for (size_t i = smallest->buffer.size(); i < largest->buffer.size(); ++i) - out.buffer[i] = largest->buffer[i]; - } - - - void inplace_logicalor(const BoolArray & ba) { - logicalor(ba,*this); - } - - /** - * Computes the logical xor and writes to the provided BoolArray (out). - * The current bitmaps is unchanged. - */ - void logicalxor(const BoolArray & ba, BoolArray & out) { - const BoolArray * smallest; - const BoolArray * largest; - if(ba.buffer.size() > buffer.size()) { - smallest = this; - largest = &ba; - out.setToSize(ba); - } else { - smallest = &ba; - largest = this; - out.setToSize(*this); - } - for (size_t i = 0; i < smallest->buffer.size(); ++i) - out.buffer[i] = buffer[i] ^ ba.buffer[i]; - for (size_t i = smallest->buffer.size(); i < largest->buffer.size(); ++i) - out.buffer[i] = largest->buffer[i]; - } - - void inplace_logicalxor(const BoolArray & ba) { - logicalxor(ba,*this); - } - - /** - * Computes the logical not and writes to the provided BoolArray (out). - * The current bitmaps is unchanged. - */ - void logicalnot(BoolArray & out) { - out.setToSize(*this); - for (size_t i = 0; i < buffer.size(); ++i) - out.buffer[i] = ~buffer[i]; - out.clearBogusBits(); - } - + return (buffer[pos / wordinbits] & + (static_cast(1) << (pos % wordinbits))) != 0; + } - void inplace_logicalnot() { - for (size_t i = 0; i < buffer.size(); ++i) - buffer[i] = ~buffer[i]; - clearBogusBits(); - } + /** + * set all bits to 0 + */ + void reset() { + if (buffer.size() > 0) + memset(&buffer[0], 0, sizeof(uword) * buffer.size()); + sizeinbits = 0; + } + size_t sizeInBits() const { return sizeinbits; } - /** - * Returns the number of bits set to the value 1. - * The running time complexity is proportional to the - * size of the bitmap. - * - * This is sometimes called the cardinality. - */ - size_t numberOfOnes() const { - size_t count = 0; - for (size_t i = 0; i < buffer.size(); ++i) { - count += countOnes((uword) buffer[i]); - } - return count; - } + ~BoolArray() {} - inline void printout(ostream &o = cout) { - for (size_t k = 0; k < sizeinbits; ++k) - o << get(k) << " "; - o << endl; - } - - /** - * Make sure the two bitmaps have the same size (padding with zeroes - * if necessary). It has constant running time complexity. - */ - void makeSameSize(BoolArray & a) { - if (a.sizeinbits < sizeinbits) - a.padWithZeroes(sizeinbits); - else if (sizeinbits < a.sizeinbits) - padWithZeroes(a.sizeinbits); - } - /** - * Make sure the current bitmap has the size of the provided bitmap. + /** + * Computes the logical and and writes to the provided BoolArray (out). + * The current bitmaps is unchanged. + */ + void logicaland(const BoolArray &ba, BoolArray &out) const { + if (ba.buffer.size() < buffer.size()) + out.setToSize(ba); + else + out.setToSize(*this); + for (size_t i = 0; i < out.buffer.size(); ++i) + out.buffer[i] = buffer[i] & ba.buffer[i]; + } + + /** + * Computes the logical and and return the result. + * The current bitmaps is unchanged. + */ + BoolArray logicaland(const BoolArray &a) const { + BoolArray answer; + logicaland(a, answer); + return answer; + } + + void inplace_logicaland(const BoolArray &ba) { + if (ba.buffer.size() < buffer.size()) + setToSize(ba); + for (size_t i = 0; i < buffer.size(); ++i) + buffer[i] = buffer[i] & ba.buffer[i]; + } + + /** + * Computes the logical andnot and writes to the provided BoolArray (out). + * The current bitmaps is unchanged. + */ + void logicalandnot(const BoolArray &ba, BoolArray &out) const { + out.setToSize(*this); + size_t upto = out.buffer.size() < ba.buffer.size() ? out.buffer.size() + : ba.buffer.size(); + for (size_t i = 0; i < upto; ++i) + out.buffer[i] = buffer[i] & (~ba.buffer[i]); + for (size_t i = upto; i < out.buffer.size(); ++i) + out.buffer[i] = buffer[i]; + out.clearBogusBits(); + } + + /** + * Computes the logical andnot and return the result. + * The current bitmaps is unchanged. + */ + BoolArray logicalandnot(const BoolArray &a) const { + BoolArray answer; + logicalandnot(a, answer); + return answer; + } + + void inplace_logicalandnot(const BoolArray &ba) { + size_t upto = + buffer.size() < ba.buffer.size() ? buffer.size() : ba.buffer.size(); + for (size_t i = 0; i < upto; ++i) + buffer[i] = buffer[i] & (~ba.buffer[i]); + clearBogusBits(); + } + + /** + * Computes the logical or and writes to the provided BoolArray (out). + * The current bitmaps is unchanged. + */ + void logicalor(const BoolArray &ba, BoolArray &out) const { + const BoolArray *smallest; + const BoolArray *largest; + if (ba.buffer.size() > buffer.size()) { + smallest = this; + largest = &ba; + out.setToSize(ba); + } else { + smallest = &ba; + largest = this; + out.setToSize(*this); + } + for (size_t i = 0; i < smallest->buffer.size(); ++i) + out.buffer[i] = buffer[i] | ba.buffer[i]; + for (size_t i = smallest->buffer.size(); i < largest->buffer.size(); ++i) + out.buffer[i] = largest->buffer[i]; + } + + /** + * Computes the logical or and return the result. + * The current bitmaps is unchanged. + */ + BoolArray logicalor(const BoolArray &a) const { + BoolArray answer; + logicalor(a, answer); + return answer; + } + + void inplace_logicalor(const BoolArray &ba) { logicalor(ba, *this); } + + /** + * Computes the logical xor and writes to the provided BoolArray (out). + * The current bitmaps is unchanged. + */ + void logicalxor(const BoolArray &ba, BoolArray &out) const { + const BoolArray *smallest; + const BoolArray *largest; + if (ba.buffer.size() > buffer.size()) { + smallest = this; + largest = &ba; + out.setToSize(ba); + } else { + smallest = &ba; + largest = this; + out.setToSize(*this); + } + for (size_t i = 0; i < smallest->buffer.size(); ++i) + out.buffer[i] = buffer[i] ^ ba.buffer[i]; + for (size_t i = smallest->buffer.size(); i < largest->buffer.size(); ++i) + out.buffer[i] = largest->buffer[i]; + } + + /** + * Computes the logical xor and return the result. + * The current bitmaps is unchanged. + */ + BoolArray logicalxor(const BoolArray &a) const { + BoolArray answer; + logicalxor(a, answer); + return answer; + } + + void inplace_logicalxor(const BoolArray &ba) { logicalxor(ba, *this); } + + /** + * Computes the logical not and writes to the provided BoolArray (out). + * The current bitmaps is unchanged. + */ + void logicalnot(BoolArray &out) const { + out.setToSize(*this); + for (size_t i = 0; i < buffer.size(); ++i) + out.buffer[i] = ~buffer[i]; + out.clearBogusBits(); + } + + /** + * Computes the logical not and return the result. + * The current bitmaps is unchanged. + */ + BoolArray logicalandnot() const { + BoolArray answer; + logicalnot(answer); + return answer; + } + + void inplace_logicalnot() { + for (size_t i = 0; i < buffer.size(); ++i) + buffer[i] = ~buffer[i]; + clearBogusBits(); + } + + /** + * Returns the number of bits set to the value 1. + * The running time complexity is proportional to the + * size of the bitmap. + * + * This is sometimes called the cardinality. */ - void setToSize(const BoolArray & a) { - sizeinbits = a.sizeinbits; - buffer.resize(a.buffer.size()); - } - - /** - * make sure the size of the array is totalbits bits by padding with zeroes. - * returns the number of words added (storage cost increase) - */ - size_t padWithZeroes(const size_t totalbits) { - size_t currentwordsize = (sizeinbits + wordinbits - 1) / wordinbits; - size_t neededwordsize = (totalbits + wordinbits - 1) / wordinbits; + size_t numberOfOnes() const { + size_t count = 0; + for (size_t i = 0; i < buffer.size(); ++i) { + count += countOnes(buffer[i]); + } + return count; + } + + inline void printout(std::ostream &o = std::cout) { + for (size_t k = 0; k < sizeinbits; ++k) + o << get(k) << " "; + o << std::endl; + } + + /** + * Make sure the two bitmaps have the same size (padding with zeroes + * if necessary). It has constant running time complexity. + */ + void makeSameSize(BoolArray &a) { + if (a.sizeinbits < sizeinbits) + a.padWithZeroes(sizeinbits); + else if (sizeinbits < a.sizeinbits) + padWithZeroes(a.sizeinbits); + } + /** + * Make sure the current bitmap has the size of the provided bitmap. + */ + void setToSize(const BoolArray &a) { + sizeinbits = a.sizeinbits; + buffer.resize(a.buffer.size()); + } + + /** + * make sure the size of the array is totalbits bits by padding with zeroes. + * returns the number of words added (storage cost increase) + */ + size_t padWithZeroes(const size_t totalbits) { + size_t currentwordsize = (sizeinbits + wordinbits - 1) / wordinbits; + size_t neededwordsize = (totalbits + wordinbits - 1) / wordinbits; #ifdef EWAHASSERT - assert(neededwordsize >= currentwordsize); + assert(neededwordsize >= currentwordsize); #endif - buffer.resize(neededwordsize); - sizeinbits = totalbits; - return static_cast(neededwordsize - currentwordsize); - - } - - void append(const BoolArray & a); - - enum { - wordinbits = sizeof(uword) * 8 - }; - - vector toArray() const { - vector ans; - for (size_t k = 0; k < buffer.size(); ++k) { - uword myword = buffer[k]; - while (myword != 0) { - uint32_t ntz = numberOfTrailingZeros (myword); - ans.push_back(sizeof(uword) * 8 * k + ntz); - myword ^= (static_cast(1) << ntz); - } - } - return ans; - } - - /** - * Transform into a string that presents a list of set bits. - * The running time is linear in the size of the bitmap. - */ - operator string() const { - stringstream ss; - ss << *this; - return ss.str(); - - } + buffer.resize(neededwordsize); + sizeinbits = totalbits; + return static_cast(neededwordsize - currentwordsize); + } + + void append(const BoolArray &a); + + enum { wordinbits = sizeof(uword) * 8 }; + + std::vector toArray() const { + std::vector ans; + for (size_t k = 0; k < buffer.size(); ++k) { + uword myword = buffer[k]; + while (myword != 0) { + uint32_t ntz = numberOfTrailingZeros(myword); + ans.push_back(sizeof(uword) * 8 * k + ntz); + myword ^= (static_cast(1) << ntz); + } + } + return ans; + } + + /** + * Transform into a string that presents a list of set bits. + * The running time is linear in the size of the bitmap. + */ + operator std::string() const { + std::stringstream ss; + ss << *this; + return ss.str(); + } + + friend std::ostream &operator<<(std::ostream &out, const BoolArray &a) { + std::vector v = a.toArray(); + out << "{"; + for (std::vector::const_iterator i = v.begin(); i != v.end();) { + out << *i; + ++i; + if (i != v.end()) + out << ","; + } + out << "}"; + return out; + + return (out << static_cast(a)); + } - friend ostream& operator<< (ostream &out, const BoolArray &a) { - vector v = a.toArray(); - out <<"{"; - for (vector::const_iterator i = v.begin(); i != v.end(); ) { - out << *i; - ++i; - if( i != v.end()) - out << ","; - } - out <<"}"; - return out; - - return (out << static_cast(a)); - } private: - - void clearBogusBits() { - if((sizeinbits % wordinbits) != 0) { - const uword maskbogus = (static_cast(1) << (sizeinbits % wordinbits)) - 1; - buffer[buffer.size() - 1] &= maskbogus; - } + void clearBogusBits() { + if ((sizeinbits % wordinbits) != 0) { + const uword maskbogus = + (static_cast(1) << (sizeinbits % wordinbits)) - 1; + buffer[buffer.size() - 1] &= maskbogus; } + } - vector buffer; - size_t sizeinbits; + std::vector buffer; + size_t sizeinbits; }; -template -void BoolArray::append(const BoolArray & a) { - if (sizeinbits % wordinbits == 0) { - buffer.insert(buffer.end(), a.buffer.begin(), a.buffer.end()); - } else { - throw invalid_argument("Cannot append if parent does not meet boundary"); - } - sizeinbits += a.sizeinbits; +/** + * computes the logical or (union) between "n" bitmaps (referenced by a + * pointer). + * The answer gets written out in container. This might be faster than calling + * logicalor n-1 times. + */ +template +void fast_logicalor_tocontainer(size_t n, const BoolArray **inputs, + BoolArray &container) { + if (n == 0) { + container.reset(); + return; + } + container = *inputs[0]; + for (size_t i = 0; i < n; i++) { + container.inplace_logicalor(*inputs[i]); + } +} + +/** + * computes the logical or (union) between "n" bitmaps (referenced by a + * pointer). + * Returns the answer. This might be faster than calling + * logicalor n-1 times. + */ +template +BoolArray fast_logicalor(size_t n, const BoolArray **inputs) { + BoolArray answer; + fast_logicalor_tocontainer(n, inputs, answer); + return answer; +} + +template void BoolArray::append(const BoolArray &a) { + if (sizeinbits % wordinbits == 0) { + buffer.insert(buffer.end(), a.buffer.begin(), a.buffer.end()); + } else { + throw std::invalid_argument( + "Cannot append if parent does not meet boundary"); + } + sizeinbits += a.sizeinbits; } #endif diff --git a/yt/utilities/lib/ewahboolarray/ewah.h b/yt/utilities/lib/ewahboolarray/ewah.h index 96d780cf3cd..2f733cc0bf2 100644 --- a/yt/utilities/lib/ewahboolarray/ewah.h +++ b/yt/utilities/lib/ewahboolarray/ewah.h @@ -3,6 +3,7 @@ * Apache License Version 2.0 http://www.apache.org/licenses/. * * (c) Daniel Lemire, http://lemire.me/en/ + * with contributions from Zarian Waheed and others. */ #ifndef EWAH_H @@ -10,24 +11,20 @@ #include #include +#include + #include "ewahutil.h" #include "boolarray.h" #include "runninglengthword.h" -using namespace std; - -template -class EWAHBoolArrayIterator; +template class EWAHBoolArrayIterator; -template -class EWAHBoolArraySetBitForwardIterator; +template class EWAHBoolArraySetBitForwardIterator; class BitmapStatistics; -template -class EWAHBoolArrayRawIterator; - +template class EWAHBoolArrayRawIterator; /** * This class is a compressed bitmap. @@ -35,739 +32,916 @@ class EWAHBoolArrayRawIterator; * happens. * The underlying data structure is an STL vector. */ -template -class EWAHBoolArray { +template class EWAHBoolArray { public: - EWAHBoolArray() : - buffer(1, 0), sizeinbits(0), lastRLW(0) { - } - - static EWAHBoolArray bitmapOf(size_t n, ...) { - EWAHBoolArray ans; - va_list vl; - va_start(vl, n); - for (size_t i = 0; i < n; i++) { - ans.set(static_cast(va_arg(vl, int))); - } - va_end(vl); - return ans; - } - - /** - * Query the value of bit i. This runs in time proportional to - * the size of the bitmap. This is not meant to be use in - * a performance-sensitive context. - * - * (This implementation is based on zhenjl's Go version of JavaEWAH.) - * - */ - bool get(const size_t pos) const { - if ( pos >= static_cast(sizeinbits) ) - return false; - const size_t wordpos = pos / wordinbits; - size_t WordChecked = 0; - EWAHBoolArrayRawIterator j = raw_iterator(); - while(j.hasNext()) { - BufferedRunningLengthWord & rle = j.next(); - WordChecked += static_cast( rle.getRunningLength()); - if(wordpos < WordChecked) - return rle.getRunningBit(); - if(wordpos < WordChecked + rle.getNumberOfLiteralWords() ) { - const uword w = j.dirtyWords()[wordpos - WordChecked]; - return (w & (static_cast(1) << (pos % wordinbits))) != 0; - } - WordChecked += static_cast(rle.getNumberOfLiteralWords()); - } - return false; - } - - - /** - * Set the ith bit to true (starting at zero). - * Auto-expands the bitmap. It has constant running time complexity. - * Note that you must set the bits in increasing order: - * set(1), set(2) is ok; set(2), set(1) is not ok. - * set(100), set(100) is also not ok. - * - * Note: by design EWAH is not an updatable data structure in - * the sense that once bit 1000 is set, you cannot change the value - * of bits 0 to 1000. - * - * Returns true if the value of the bit was changed, and false otherwise. - * (In practice, if you set the bits in strictly increasing order, it - * should always return true.) - */ - bool set(size_t i); - - /** - * Transform into a string that presents a list of set bits. - * The running time is linear in the compressed size of the bitmap. - */ - operator string() const { - stringstream ss; - ss << *this; - return ss.str(); - } - friend ostream& operator<< (ostream &out, const EWAHBoolArray &a) { - - out<<"{"; - for (EWAHBoolArray::const_iterator i = a.begin(); i != a.end(); ) { - out<<*i; - ++i; - if( i != a.end()) - out << ","; - } - out <<"}"; - - return out; - } - /** - * Make sure the two bitmaps have the same size (padding with zeroes - * if necessary). It has constant running time complexity. - * - * This is useful when calling "logicalnot" functions. - * - * This can an adverse effect of performance, especially when computing - * intersections. - */ - void makeSameSize(EWAHBoolArray & a) { - if (a.sizeinbits < sizeinbits) - a.padWithZeroes(sizeinbits); - else if (sizeinbits < a.sizeinbits) - padWithZeroes(a.sizeinbits); - } - - enum { - RESERVEMEMORY = true - }; // for speed - - typedef EWAHBoolArraySetBitForwardIterator const_iterator; - - /** - * Returns an iterator that can be used to access the position of the - * set bits. The running time complexity of a full scan is proportional to the number - * of set bits: be aware that if you have long strings of 1s, this can be - * very inefficient. - * - * It can be much faster to use the toArray method if you want to - * retrieve the set bits. - */ - const_iterator begin() const { - return EWAHBoolArraySetBitForwardIterator (buffer); - } + EWAHBoolArray() : buffer(1, 0), sizeinbits(0), lastRLW(0) {} - /** - * Basically a bogus iterator that can be used together with begin() - * for constructions such as for(EWAHBoolArray::iterator i = b.begin(); i!=b.end(); ++i) {} - */ - const_iterator end() const { - return EWAHBoolArraySetBitForwardIterator (buffer, buffer.size()); + static EWAHBoolArray bitmapOf(size_t n, ...) { + EWAHBoolArray ans; + va_list vl; + va_start(vl, n); + for (size_t i = 0; i < n; i++) { + ans.set(static_cast(va_arg(vl, int))); } - - /** - * Retrieve the set bits. Can be much faster than iterating through - * the set bits with an iterator. - */ - vector toArray() const; - - /** - * computes the logical and with another compressed bitmap - * answer goes into container - * Running time complexity is proportional to the sum of the compressed - * bitmap sizes. - */ - void logicaland(const EWAHBoolArray &a, EWAHBoolArray &container) const; - - /** - * tests whether the bitmaps "intersect" (have at least one 1-bit at the same - * position). This function does not modify the existing bitmaps. - * It is faster than calling logicaland. - */ - bool intersects(const EWAHBoolArray &a) const; - - /** - * computes the logical or with another compressed bitmap - * answer goes into container - * Running time complexity is proportional to the sum of the compressed - * bitmap sizes. - */ - void logicalor(const EWAHBoolArray &a, EWAHBoolArray &container) const; - - - /** - * computes the logical xor with another compressed bitmap - * answer goes into container - * Running time complexity is proportional to the sum of the compressed - * bitmap sizes. - */ - void logicalxor(const EWAHBoolArray &a, EWAHBoolArray &container) const; - - /** - * clear the content of the bitmap. It does not - * release the memory. - */ - void reset() { - buffer.clear(); - buffer.push_back(0); - sizeinbits = 0; - lastRLW = 0; - } - - /** - * convenience method. - * - * returns the number of words added (storage cost increase) - */ - inline size_t addWord(const uword newdata, - const uint32_t bitsthatmatter = 8 * sizeof(uword)); - - inline void printout(ostream &o = cout) { - toBoolArray().printout(o); - } - - /** - * Prints a verbose description of the content of the compressed bitmap. - */ - void debugprintout() const; - - /** - * Return the size in bits of this bitmap (this refers - * to the uncompressed size in bits). - * - * You can increase it with padWithZeroes() - */ - inline size_t sizeInBits() const { - return sizeinbits; - } - - - /** - * Return the size of the buffer in bytes. This - * is equivalent to the storage cost, minus some overhead. - */ - inline size_t sizeInBytes() const { - return buffer.size() * sizeof(uword); - } - - /** - * same as addEmptyWord, but you can do several in one shot! - * returns the number of words added (storage cost increase) - */ - size_t addStreamOfEmptyWords(const bool v, size_t number); - - /** - * add a stream of dirty words, returns the number of words added - * (storage cost increase) - */ - size_t addStreamOfDirtyWords(const uword * v, const size_t number); - - /** - * add a stream of dirty words, each one negated, returns the number of words added - * (storage cost increase) - */ - size_t addStreamOfNegatedDirtyWords(const uword * v, const size_t number); - - - /** - * make sure the size of the array is totalbits bits by padding with zeroes. - * returns the number of words added (storage cost increase). - * - * This is useful when calling "logicalnot" functions. - * - * This can an adverse effect of performance, especially when computing - * intersections. - * - */ - size_t padWithZeroes(const size_t totalbits); - - /** - * Compute the size on disk assuming that it was saved using - * the method "save". - */ - size_t sizeOnDisk() const; - - /** - * Save this bitmap to a stream. The file format is - * | sizeinbits | buffer lenth | buffer content| - * the sizeinbits part can be omitted if "savesizeinbits=false". - * Both sizeinbits and buffer length are saved using the size_t data - * type which is typically a 32-bit unsigned integer for 32-bit CPUs - * and a 64-bit unsigned integer for 64-bit CPUs. - * Note that this format is machine-specific. Note also - * that the word size is not saved. For robust persistent - * storage, you need to save this extra information elsewhere. - */ - void write(ostream & out, const bool savesizeinbits = true) const; - - /** - * This only writes the content of the buffer (see write()) method. - * It is for advanced users. - */ - void writeBuffer(ostream & out) const; - - /** - * size (in words) of the underlying STL vector. - */ - size_t bufferSize() const { - return buffer.size(); - } - - /** - * this is the counterpart to the write method. - * if you set savesizeinbits=false, then you are responsible - * for setting the value fo the attribute sizeinbits (see method setSizeInBits). - */ - void read(istream & in, const bool savesizeinbits = true); - - /** - * read the buffer from a stream, see method writeBuffer. - * this is for advanced users. - */ - void readBuffer(istream & in, const size_t buffersize); - - - /** - * We define two EWAHBoolArray as being equal if they have the same set bits. - * Alternatively, B1==B2 if and only if cardinality(B1 XOR B2) ==0. - */ - bool operator==(const EWAHBoolArray & x) const; - - /** - * We define two EWAHBoolArray as being different if they do not have the same set bits. - * Alternatively, B1!=B2 if and only if cardinality(B1 XOR B2) >0. - */ - bool operator!=(const EWAHBoolArray & x) const; - - bool operator==(const BoolArray & x) const; - - bool operator!=(const BoolArray & x) const; - - /** - * Iterate over the uncompressed words. - * Can be considerably faster than begin()/end(). - * Running time complexity of a full scan is proportional to the - * uncompressed size of the bitmap. - */ - EWAHBoolArrayIterator uncompress() const ; - - /** - * To iterate over the compressed data. - * Can be faster than any other iterator. - * Running time complexity of a full scan is proportional to the - * compressed size of the bitmap. - */ - EWAHBoolArrayRawIterator raw_iterator() const ; - - /** - * Appends the content of some other compressed bitmap - * at the end of the current bitmap. - */ - void append(const EWAHBoolArray & x); - - /** - * For research purposes. This computes the number of - * dirty words and the number of compressed words. - */ - BitmapStatistics computeStatistics() const; - - /** - * For convenience, this fully uncompresses the bitmap. - * Not fast! - */ - BoolArray toBoolArray() const; - - /** - * Convert to a list of positions of "set" bits. - * The recommended container is vector. - * - * See also toVector(). - */ - template - void appendRowIDs(container & out, const size_t offset = 0) const; - - /** - * Convert to a list of positions of "set" bits. - * The recommended container is vector. - * (alias for appendRowIDs). - * - * See also toVector(). - */ - template - void appendSetBits(container & out, const size_t offset = 0) const { - return appendRowIDs(out, offset); - } - - /** - * Returns a vector containing the position of the set - * bits in increasing order. - */ - vector toVector() { - vector answer; - appendSetBits(answer); - return answer; - } - - /** - * Returns the number of bits set to the value 1. - * The running time complexity is proportional to the - * compressed size of the bitmap. - * - * This is sometimes called the cardinality. - */ - size_t numberOfOnes() const; - - /** - * Swap the content of this bitmap with another bitmap. - * No copying is done. (Running time complexity is constant.) - */ - void swap(EWAHBoolArray & x); - - const vector & getBuffer() const { - return buffer; - } - ; - enum { - wordinbits = sizeof(uword) * 8 - }; - - /** - *Please don't copy your bitmaps! The running time - * complexity of a copy is the size of the compressed bitmap. - **/ - EWAHBoolArray(const EWAHBoolArray& other) : - buffer(other.buffer), sizeinbits(other.sizeinbits), - lastRLW(other.lastRLW) { - } - - /** - * Copies the content of one bitmap onto another. Running time complexity - * is proportional to the size of the compressed bitmap. - * please, never hard-copy this object. Use the swap method if you must. - */ - EWAHBoolArray & operator=(const EWAHBoolArray & x) { - buffer = x.buffer; - sizeinbits = x.sizeinbits; - lastRLW = x.lastRLW; - return *this; - } - - /** - * This is equivalent to the operator =. It is used - * to keep in mind that assignment can be expensive. - * - *if you don't care to copy the bitmap (performance-wise), use this! - */ - void expensive_copy(const EWAHBoolArray & x) { - buffer = x.buffer; - sizeinbits = x.sizeinbits; - lastRLW = x.lastRLW; + va_end(vl); + return ans; + } + + /** + * Recover wasted memory usage. Fit buffers to the actual data. + */ + void trim() { buffer.shrink_to_fit(); } + + /** + * Query the value of bit i. This runs in time proportional to + * the size of the bitmap. This is not meant to be use in + * a performance-sensitive context. + * + * (This implementation is based on zhenjl's Go version of JavaEWAH.) + * + */ + bool get(const size_t pos) const { + if (pos >= static_cast(sizeinbits)) + return false; + const size_t wordpos = pos / wordinbits; + size_t WordChecked = 0; + EWAHBoolArrayRawIterator j = raw_iterator(); + while (j.hasNext()) { + BufferedRunningLengthWord &rle = j.next(); + WordChecked += static_cast(rle.getRunningLength()); + if (wordpos < WordChecked) + return rle.getRunningBit(); + if (wordpos < WordChecked + rle.getNumberOfLiteralWords()) { + const uword w = j.dirtyWords()[wordpos - WordChecked]; + return (w & (static_cast(1) << (pos % wordinbits))) != 0; + } + WordChecked += static_cast(rle.getNumberOfLiteralWords()); } + return false; + } + + /** + * Returns true if no bit is set. + */ + bool empty() const { + size_t pointer(0); + while (pointer < buffer.size()) { + ConstRunningLengthWord rlw(buffer[pointer]); + if (rlw.getRunningBit()) { + if(rlw.getRunningLength() > 0) return false; + } + ++pointer; + for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) { + if(buffer[pointer] != 0) return false; + ++pointer; + } + } + return true; + } + + + /** + * Set the ith bit to true (starting at zero). + * Auto-expands the bitmap. It has constant running time complexity. + * Note that you must set the bits in increasing order: + * set(1), set(2) is ok; set(2), set(1) is not ok. + * set(100), set(100) is also not ok. + * + * Note: by design EWAH is not an updatable data structure in + * the sense that once bit 1000 is set, you cannot change the value + * of bits 0 to 1000. + * + * Returns true if the value of the bit was changed, and false otherwise. + * (In practice, if you set the bits in strictly increasing order, it + * should always return true.) + */ + bool set(size_t i); + + /** + * Transform into a string that presents a list of set bits. + * The running time is linear in the compressed size of the bitmap. + */ + operator std::string() const { + std::stringstream ss; + ss << *this; + return ss.str(); + } + friend std::ostream &operator<<(std::ostream &out, const EWAHBoolArray &a) { + + out << "{"; + for (EWAHBoolArray::const_iterator i = a.begin(); i != a.end();) { + out << *i; + ++i; + if (i != a.end()) + out << ","; + } + out << "}"; + + return out; + } + /** + * Make sure the two bitmaps have the same size (padding with zeroes + * if necessary). It has constant running time complexity. + * + * This is useful when calling "logicalnot" functions. + * + * This can an adverse effect of performance, especially when computing + * intersections. + */ + void makeSameSize(EWAHBoolArray &a) { + if (a.sizeinbits < sizeinbits) + a.padWithZeroes(sizeinbits); + else if (sizeinbits < a.sizeinbits) + padWithZeroes(a.sizeinbits); + } + + enum { RESERVEMEMORY = true }; // for speed + + typedef EWAHBoolArraySetBitForwardIterator const_iterator; + + /** + * Returns an iterator that can be used to access the position of the + * set bits. The running time complexity of a full scan is proportional to the + * number + * of set bits: be aware that if you have long strings of 1s, this can be + * very inefficient. + * + * It can be much faster to use the toArray method if you want to + * retrieve the set bits. + */ + const_iterator begin() const { + return EWAHBoolArraySetBitForwardIterator(&buffer); + } + + /** + * Basically a bogus iterator that can be used together with begin() + * for constructions such as for(EWAHBoolArray::iterator i = b.begin(); + * i!=b.end(); ++i) {} + */ + const_iterator &end() const { + return EWAHBoolArraySetBitForwardIterator::end(); + } + + /** + * Retrieve the set bits. Can be much faster than iterating through + * the set bits with an iterator. + */ + std::vector toArray() const; + + /** + * computes the logical and with another compressed bitmap + * answer goes into container + * Running time complexity is proportional to the sum of the compressed + * bitmap sizes. + * + * The sizeInBits() of the result is equal to the maximum that of the current + * bitmap's sizeInBits() and that of a.sizeInBits(). + */ + void logicaland(const EWAHBoolArray &a, EWAHBoolArray &container) const; + + /** + * computes the logical and with another compressed bitmap + * Return the answer + * Running time complexity is proportional to the sum of the compressed + * bitmap sizes. + * + * The sizeInBits() of the result is equal to the maximum that of the current + * bitmap's sizeInBits() and that of a.sizeInBits(). + */ + EWAHBoolArray logicaland(const EWAHBoolArray &a) const { + EWAHBoolArray answer; + logicaland(a, answer); + return answer; + } + + /** + * calls logicaland + */ + EWAHBoolArray operator&(const EWAHBoolArray &a) const { + return logicaland(a); + } + + /** + * computes the logical and with another compressed bitmap + * answer goes into container + * Running time complexity is proportional to the sum of the compressed + * bitmap sizes. + * + * The sizeInBits() of the result should be equal to that of the current + * bitmap irrespective of a.sizeInBits(). + * + */ + void logicalandnot(const EWAHBoolArray &a, EWAHBoolArray &container) const; + + /** + * calls logicalandnot + */ + EWAHBoolArray operator-(const EWAHBoolArray &a) const { + return logicalandnot(a); + } + + /** + * computes the logical and not with another compressed bitmap + * Return the answer + * Running time complexity is proportional to the sum of the compressed + * bitmap sizes. + * + * The sizeInBits() of the result should be equal to that of the current + * bitmap irrespective of a.sizeInBits(). + * + */ + EWAHBoolArray logicalandnot(const EWAHBoolArray &a) const { + EWAHBoolArray answer; + logicalandnot(a, answer); + return answer; + } + + /** + * tests whether the bitmaps "intersect" (have at least one 1-bit at the same + * position). This function does not modify the existing bitmaps. + * It is faster than calling logicaland. + */ + bool intersects(const EWAHBoolArray &a) const; + + /** + * computes the logical or with another compressed bitmap + * answer goes into container + * Running time complexity is proportional to the sum of the compressed + * bitmap sizes. + * + * If you have many bitmaps, see fast_logicalor_tocontainer. + * + * The sizeInBits() of the result is equal to the maximum that of the current + * bitmap's sizeInBits() and that of a.sizeInBits(). + */ + void logicalor(const EWAHBoolArray &a, EWAHBoolArray &container) const; + + /** + * computes the size (in number of set bits) of the logical or with another + * compressed bitmap + * Running time complexity is proportional to the sum of the compressed + * bitmap sizes. + */ + size_t logicalorcount(const EWAHBoolArray &a) const; + + /** + * computes the size (in number of set bits) of the logical and with another + * compressed bitmap + * Running time complexity is proportional to the sum of the compressed + * bitmap sizes. + */ + size_t logicalandcount(const EWAHBoolArray &a) const; + + /** + * computes the size (in number of set bits) of the logical and not with + * another compressed bitmap + * Running time complexity is proportional to the sum of the compressed + * bitmap sizes. + */ + size_t logicalandnotcount(const EWAHBoolArray &a) const; + + /** + * computes the size (in number of set bits) of the logical xor with another + * compressed bitmap + * Running time complexity is proportional to the sum of the compressed + * bitmap sizes. + */ + size_t logicalxorcount(const EWAHBoolArray &a) const; + + /** + * computes the logical or with another compressed bitmap + * Return the answer + * Running time complexity is proportional to the sum of the compressed + * bitmap sizes. + * + * If you have many bitmaps, see fast_logicalor. + * + * The sizeInBits() of the result is equal to the maximum that of the current + * bitmap's sizeInBits() and that of a.sizeInBits(). + */ + EWAHBoolArray logicalor(const EWAHBoolArray &a) const { + EWAHBoolArray answer; + logicalor(a, answer); + return answer; + } + + /** + * calls logicalor + */ + EWAHBoolArray operator|(const EWAHBoolArray &a) const { return logicalor(a); } + + /** + * computes the logical xor with another compressed bitmap + * answer goes into container + * Running time complexity is proportional to the sum of the compressed + * bitmap sizes. + * + * The sizeInBits() of the result is equal to the maximum that of the current + * bitmap's sizeInBits() and that of a.sizeInBits(). + */ + void logicalxor(const EWAHBoolArray &a, EWAHBoolArray &container) const; + + /** + * computes the logical xor with another compressed bitmap + * Return the answer + * Running time complexity is proportional to the sum of the compressed + * bitmap sizes. + * + * The sizeInBits() of the result is equal to the maximum that of the current + * bitmap's sizeInBits() and that of a.sizeInBits(). + */ + EWAHBoolArray logicalxor(const EWAHBoolArray &a) const { + EWAHBoolArray answer; + logicalxor(a, answer); + return answer; + } + + /** + * calls logicalxor + */ + EWAHBoolArray operator^(const EWAHBoolArray &a) const { + return logicalxor(a); + } + /** + * clear the content of the bitmap. It does not + * release the memory. + */ + void reset() { + buffer.clear(); + buffer.push_back(0); + sizeinbits = 0; + lastRLW = 0; + } + + /** + * convenience method. + * + * returns the number of words added (storage cost increase) + */ + inline size_t addWord(const uword newdata, + const uint32_t bitsthatmatter = 8 * sizeof(uword)); + + inline void printout(std::ostream &o = std::cout) { + toBoolArray().printout(o); + } + + /** + * Prints a verbose description of the content of the compressed bitmap. + */ + void debugprintout() const; + + /** + * Return the size in bits of this bitmap (this refers + * to the uncompressed size in bits). + * + * You can increase it with padWithZeroes() + */ + inline size_t sizeInBits() const { return sizeinbits; } + + /** + * Return the size of the buffer in bytes. This + * is equivalent to the storage cost, minus some overhead. + * See sizeOnDisk to get the actual storage cost with overhead. + */ + inline size_t sizeInBytes() const { return buffer.size() * sizeof(uword); } + + /** + * same as addEmptyWord, but you can do several in one shot! + * returns the number of words added (storage cost increase) + */ + size_t addStreamOfEmptyWords(const bool v, size_t number); + + /** + * add a stream of dirty words, returns the number of words added + * (storage cost increase) + */ + size_t addStreamOfDirtyWords(const uword *v, const size_t number); + + /** + * add a stream of dirty words, each one negated, returns the number of words + * added + * (storage cost increase) + */ + size_t addStreamOfNegatedDirtyWords(const uword *v, const size_t number); + + /** + * make sure the size of the array is totalbits bits by padding with zeroes. + * returns the number of words added (storage cost increase). + * + * This is useful when calling "logicalnot" functions. + * + * This can an adverse effect of performance, especially when computing + * intersections. + * + */ + size_t padWithZeroes(const size_t totalbits); + + /** + * Compute the size on disk assuming that it was saved using + * the method "write". + */ + size_t sizeOnDisk(const bool savesizeinbits = true) const; + + /** + * Save this bitmap to a stream. The file format is + * | sizeinbits | buffer lenth | buffer content| + * the sizeinbits part can be omitted if "savesizeinbits=false". + * Both sizeinbits and buffer length are saved using the size_t data + * type which is typically a 32-bit unsigned integer for 32-bit CPUs + * and a 64-bit unsigned integer for 64-bit CPUs. + * Note that this format is machine-specific. Note also + * that the word size is not saved. For robust persistent + * storage, you need to save this extra information elsewhere. + * + * Returns how many bytes were handed out to the stream. + */ + size_t write(std::ostream &out, const bool savesizeinbits = true) const; + + /** + * same as write(std::ostream...), except that you provide a char pointer + * and a "capacity" (in bytes). The function never writes at or beyond "out+capacity". + * If the storage needed exceeds the + * given capacity, the value zero is returned: it should be considered an error. + * Otherwise, the number of bytes copied is returned. + */ + size_t write(char * out, size_t capacity, const bool savesizeinbits = true) const; + + /** + * This only writes the content of the buffer (see write()) method. + * It is for advanced users. + */ + void writeBuffer(std::ostream &out) const; + + /** + * size (in words) of the underlying STL vector. + */ + size_t bufferSize() const { return buffer.size(); } + + /** + * this is the counterpart to the write method. + * if you set savesizeinbits=false, then you are responsible + * for setting the value fo the attribute sizeinbits (see method + * setSizeInBits). + * + * Returns how many bytes were queried from the stream. + */ + size_t read(std::istream &in, const bool savesizeinbits = true); + + + /** + * same as read(std::istream...), except that you provide a char pointer + * and a "capacity" (in bytes). The function never reads at or beyond "in+capacity". + * If the detected storage exceeds the given capacity, the value zero is returned: + * it should be considered an error. + * Otherwise, the number of bytes read is returned. + */ + size_t read(const char * in, size_t capacity, const bool savesizeinbits = true); + + /** + * read the buffer from a stream, see method writeBuffer. + * this is for advanced users. + */ + void readBuffer(std::istream &in, const size_t buffersize); + + /** + * We define two EWAHBoolArray as being equal if they have the same set bits. + * Alternatively, B1==B2 if and only if cardinality(B1 XOR B2) ==0. + */ + bool operator==(const EWAHBoolArray &x) const; + + /** + * We define two EWAHBoolArray as being different if they do not have the same + * set bits. + * Alternatively, B1!=B2 if and only if cardinality(B1 XOR B2) >0. + */ + bool operator!=(const EWAHBoolArray &x) const; + + bool operator==(const BoolArray &x) const; + + bool operator!=(const BoolArray &x) const; + + /** + * Iterate over the uncompressed words. + * Can be considerably faster than begin()/end(). + * Running time complexity of a full scan is proportional to the + * uncompressed size of the bitmap. + */ + EWAHBoolArrayIterator uncompress() const; + + /** + * To iterate over the compressed data. + * Can be faster than any other iterator. + * Running time complexity of a full scan is proportional to the + * compressed size of the bitmap. + */ + EWAHBoolArrayRawIterator raw_iterator() const; + + /** + * Appends the content of some other compressed bitmap + * at the end of the current bitmap. + */ + void append(const EWAHBoolArray &x); + + /** + * For research purposes. This computes the number of + * dirty words and the number of compressed words. + */ + BitmapStatistics computeStatistics() const; + + /** + * For convenience, this fully uncompresses the bitmap. + * Not fast! + */ + BoolArray toBoolArray() const; + + /** + * Convert to a list of positions of "set" bits. + * The recommended container is vector. + * + * See also toArray(). + */ + template + void appendRowIDs(container &out, const size_t offset = 0) const; + + /** + * Convert to a list of positions of "set" bits. + * The recommended container is vector. + * (alias for appendRowIDs). + * + * See also toArray(). + */ + template + void appendSetBits(container &out, const size_t offset = 0) const { + return appendRowIDs(out, offset); + } + + /** + * Returns a vector containing the position of the set + * bits in increasing order. This just calls "toArray". + */ + std::vector toVector() const { return toArray(); } + + /** + * Returns the number of bits set to the value 1. + * The running time complexity is proportional to the + * compressed size of the bitmap. + * + * This is sometimes called the cardinality. + */ + size_t numberOfOnes() const; + + /** + * Swap the content of this bitmap with another bitmap. + * No copying is done. (Running time complexity is constant.) + */ + void swap(EWAHBoolArray &x); + + const std::vector &getBuffer() const { return buffer; } + + enum { wordinbits = sizeof(uword) * 8 }; + + /** + * Please don't copy your bitmaps! The running time + * complexity of a copy is the size of the compressed bitmap. + **/ + EWAHBoolArray(const EWAHBoolArray &other) + : buffer(other.buffer), sizeinbits(other.sizeinbits), + lastRLW(other.lastRLW) {} + + /** + * Copies the content of one bitmap onto another. Running time complexity + * is proportional to the size of the compressed bitmap. + * please, never hard-copy this object. Use the swap method if you must. + */ + EWAHBoolArray &operator=(const EWAHBoolArray &x) { + buffer = x.buffer; + sizeinbits = x.sizeinbits; + lastRLW = x.lastRLW; + return *this; + } + + /** + * Move constructor. + */ + EWAHBoolArray(EWAHBoolArray &&other) + : buffer(std::move(other.buffer)), sizeinbits(other.sizeinbits), + lastRLW(other.lastRLW) {} + + /** + * Move assignment operator. + */ + EWAHBoolArray &operator=(EWAHBoolArray &&x) { + buffer = std::move(x.buffer); + sizeinbits = x.sizeinbits; + lastRLW = x.lastRLW; + return *this; + } + + /** + * This is equivalent to the operator =. It is used + * to keep in mind that assignment can be expensive. + * + *if you don't care to copy the bitmap (performance-wise), use this! + */ + void expensive_copy(const EWAHBoolArray &x) { + buffer = x.buffer; + sizeinbits = x.sizeinbits; + lastRLW = x.lastRLW; + } + + /** + * Write the logical not of this bitmap in the provided container. + * + * This function takes into account the sizeInBits value. + * You may need to call "padWithZeroes" to adjust the sizeInBits. + */ + void logicalnot(EWAHBoolArray &x) const; + + /** + * Write the logical not of this bitmap in the provided container. + * + * This function takes into account the sizeInBits value. + * You may need to call "padWithZeroes" to adjust the sizeInBits. + */ + EWAHBoolArray logicalnot() const { + EWAHBoolArray answer; + logicalnot(answer); + return answer; + } + + /** + * Apply the logical not operation on this bitmap. + * Running time complexity is proportional to the compressed size of the + *bitmap. + * The current bitmap is not modified. + * + * This function takes into account the sizeInBits value. + * You may need to call "padWithZeroes" to adjust the sizeInBits. + **/ + void inplace_logicalnot(); + + /** + * set size in bits. This does not affect the compressed size. It + * runs in constant time. This should not normally be used, except + * as part of a deserialization process. + */ + inline void setSizeInBits(const size_t size) { sizeinbits = size; } + + /** + * Like addStreamOfEmptyWords but + * addStreamOfEmptyWords but does not return the cost increase, + * does not update sizeinbits + */ + inline void fastaddStreamOfEmptyWords(const bool v, size_t number); + /** + * LikeaddStreamOfDirtyWords but does not return the cost increse, + * does not update sizeinbits. + */ + inline void fastaddStreamOfDirtyWords(const uword *v, const size_t number); - /** - * Write the logical not of this bitmap in the provided container. - * - * This function takes into account the sizeInBits value. - * You may need to call "padWithZeroes" to adjust the sizeInBits. - */ - void logicalnot(EWAHBoolArray & x) const; - - /** - * Apply the logical not operation on this bitmap. - * Running time complexity is proportional to the compressed size of the bitmap. - * The current bitmap is not modified. - * - * This function takes into account the sizeInBits value. - * You may need to call "padWithZeroes" to adjust the sizeInBits. - **/ - void inplace_logicalnot(); - - /** - * set size in bits. This does not affect the compressed size. It - * runs in constant time. This should not normally be used, except - * as part of a deserialization process. - */ - inline void setSizeInBits(const size_t size) { - sizeinbits = size; - } private: - - // addStreamOfEmptyWords but does not return the cost increase, - // does not update sizeinbits and does not check that number>0 - void fastaddStreamOfEmptyWords(const bool v, size_t number); - - // private because does not increment the size in bits - // returns the number of words added (storage cost increase) - inline size_t addLiteralWord(const uword newdata); - - // private because does not increment the size in bits - // returns the number of words added (storage cost increase) - size_t addEmptyWord(const bool v); - // this second version "might" be faster if you hate OOP. - // in my tests, it turned out to be slower! - // private because does not increment the size in bits - //inline void addEmptyWordStaticCalls(bool v); - - vector buffer; - size_t sizeinbits; - size_t lastRLW; + // private because does not increment the size in bits + // returns the number of words added (storage cost increase) + inline size_t addLiteralWord(const uword newdata); + + // private because does not increment the size in bits + // returns the number of words added (storage cost increase) + size_t addEmptyWord(const bool v); + // this second version "might" be faster if you hate OOP. + // in my tests, it turned out to be slower! + // private because does not increment the size in bits + // inline void addEmptyWordStaticCalls(bool v); + + std::vector buffer; + size_t sizeinbits; + size_t lastRLW; }; +/** + * computes the logical or (union) between "n" bitmaps (referenced by a + * pointer). + * The answer gets written out in container. This might be faster than calling + * logicalor n-1 times. + */ +template +void fast_logicalor_tocontainer(size_t n, const EWAHBoolArray **inputs, + EWAHBoolArray &container); + +/** + * computes the logical or (union) between "n" bitmaps (referenced by a + * pointer). + * Returns the answer. This might be faster than calling + * logicalor n-1 times. + */ +template +EWAHBoolArray fast_logicalor(size_t n, + const EWAHBoolArray **inputs) { + EWAHBoolArray answer; + fast_logicalor_tocontainer(n, inputs, answer); + return answer; +} + /** * Iterate over words of bits from a compressed bitmap. */ -template -class EWAHBoolArrayIterator { +template class EWAHBoolArrayIterator { public: - /** - * is there a new word? - */ - bool hasNext() const { - return pointer < myparent.size(); + /** + * is there a new word? + */ + bool hasNext() const { return pointer < myparent.size(); } + + /** + * return next word. + */ + uword next() { + uword returnvalue; + if (compressedwords < rl) { + ++compressedwords; + if (b) + returnvalue = notzero; + else + returnvalue = zero; + } else { + ++literalwords; + ++pointer; + returnvalue = myparent[pointer]; } - - /** - * return next word. - */ - uword next() { - uword returnvalue; - if (compressedwords < rl) { - ++compressedwords; - if (b) - returnvalue = notzero; - else - returnvalue = zero; - } else { -#ifdef EWAHASSERT - assert(literalwords < lw); -#endif - ++literalwords; - ++pointer; -#ifdef EWAHASSERT - assert(pointer < myparent.size()); -#endif - returnvalue = myparent[pointer]; - } - if ((compressedwords == rl) && (literalwords == lw)) { - ++pointer; - if (pointer < myparent.size()) - readNewRunningLengthWord(); - } - return returnvalue; + if ((compressedwords == rl) && (literalwords == lw)) { + ++pointer; + if (pointer < myparent.size()) + readNewRunningLengthWord(); } + return returnvalue; + } - EWAHBoolArrayIterator(const EWAHBoolArrayIterator & other) : - pointer(other.pointer), myparent(other.myparent), - compressedwords(other.compressedwords), - literalwords(other.literalwords), rl(other.rl), lw(other.lw), - b(other.b) { - } + EWAHBoolArrayIterator(const EWAHBoolArrayIterator &other) + : pointer(other.pointer), myparent(other.myparent), + compressedwords(other.compressedwords), + literalwords(other.literalwords), rl(other.rl), lw(other.lw), + b(other.b) {} + + static const uword zero = 0; + static const uword notzero = static_cast(~zero); - static const uword zero = 0; - static const uword notzero = static_cast (~zero); private: - EWAHBoolArrayIterator(const vector & parent); - void readNewRunningLengthWord(); - friend class EWAHBoolArray ; - size_t pointer; - const vector & myparent; - uword compressedwords; - uword literalwords; - uword rl, lw; - bool b; + EWAHBoolArrayIterator(const std::vector &parent); + void readNewRunningLengthWord(); + friend class EWAHBoolArray; + size_t pointer; + const std::vector &myparent; + uword compressedwords; + uword literalwords; + uword rl, lw; + bool b; }; /** * Used to go through the set bits. Not optimally fast, but convenient. */ -template -class EWAHBoolArraySetBitForwardIterator { +template class EWAHBoolArraySetBitForwardIterator { public: - enum { - wordinbits = sizeof(uword) * 8 - }; - typedef forward_iterator_tag iterator_category; - typedef size_t * pointer; - typedef size_t & reference_type; - typedef size_t value_type; - typedef ptrdiff_t difference_type; - typedef EWAHBoolArraySetBitForwardIterator type_of_iterator; - - /** - * Provides the location of the set bit. - */ - size_t operator*() const { - return currentrunoffset + offsetofpreviousrun; - } - - // this can be expensive - difference_type operator-(const type_of_iterator& o) { - type_of_iterator& smaller = *this < o ? *this : o; - type_of_iterator& bigger = *this >= o ? *this : o; - if (smaller.mpointer == smaller.buffer.size()) - return 0; - difference_type absdiff = static_cast (0); - EWAHBoolArraySetBitForwardIterator buf(smaller); - while (buf != bigger) { - ++absdiff; - ++buf; - } - if (*this < o) - return absdiff; - else - return -absdiff; - } - - bool operator<(const type_of_iterator& o) { - if (buffer != o.buffer) - return false; - if (mpointer == buffer.size()) - return false; - if (o.mpointer == o.buffer.size()) - return true; - if (offsetofpreviousrun < o.offsetofpreviousrun) - return true; - if (offsetofpreviousrun > o.offsetofpreviousrun) - return false; - if (currentrunoffset < o.currentrunoffset) - return true; - return false; - } - bool operator<=(const type_of_iterator& o) { - return ((*this) < o) || ((*this) == o); - } - - bool operator>(const type_of_iterator& o) { - return !((*this) <= o); - } - - bool operator>=(const type_of_iterator& o) { - return !((*this) < o); - } - - EWAHBoolArraySetBitForwardIterator & operator++() { - ++currentrunoffset; - advanceToNextSetBit(); - return *this; - } - EWAHBoolArraySetBitForwardIterator operator++(int) { - EWAHBoolArraySetBitForwardIterator old(*this); - ++currentrunoffset; - advanceToNextSetBit(); - return old; - } - bool operator==(const EWAHBoolArraySetBitForwardIterator & o) { - // if they are both over, return true - if ((mpointer == buffer.size()) && (o.mpointer == o.buffer.size())) - return true; - return (&buffer == &o.buffer) && (mpointer == o.mpointer) - && (offsetofpreviousrun == o.offsetofpreviousrun) - && (currentrunoffset == o.currentrunoffset); - } - bool operator!=(const EWAHBoolArraySetBitForwardIterator & o) { - // if they are both over, return false - if ((mpointer == buffer.size()) && (o.mpointer == o.buffer.size())) - return false; - return (&buffer != &o.buffer) || (mpointer != o.mpointer) - || (offsetofpreviousrun != o.offsetofpreviousrun) - || (currentrunoffset != o.currentrunoffset); + typedef std::forward_iterator_tag iterator_category; + typedef size_t *pointer; + typedef size_t &reference_type; + typedef size_t value_type; + typedef ptrdiff_t difference_type; + typedef EWAHBoolArraySetBitForwardIterator type_of_iterator; + /** + * Provides the location of the set bit. + */ + inline size_t operator*() const { return answer; } + + bool operator<(const type_of_iterator &o) const { + if (!o.hasValue) + return true; + if (!hasValue) + return false; + return answer < o.answer; + } + + bool operator<=(const type_of_iterator &o) const { + if (!o.hasValue) + return true; + if (!hasValue) + return false; + return answer <= o.answer; + } + + bool operator>(const type_of_iterator &o) const { return !((*this) <= o); } + + bool operator>=(const type_of_iterator &o) const { return !((*this) < o); } + + EWAHBoolArraySetBitForwardIterator &operator++() { //++i + if (hasNext) + next(); + else + hasValue = false; + return *this; + } + + EWAHBoolArraySetBitForwardIterator operator++(int) { // i++ + EWAHBoolArraySetBitForwardIterator old(*this); + if (hasNext) + next(); + else + hasValue = false; + return old; + } + + bool operator==(const EWAHBoolArraySetBitForwardIterator &o) const { + if ((!hasValue) && (!o.hasValue)) + return true; + return (hasValue == o.hasValue) && (answer == o.answer); + } + + bool operator!=(const EWAHBoolArraySetBitForwardIterator &o) const { + return !(*this == o); + } + + static EWAHBoolArraySetBitForwardIterator &end() { + static EWAHBoolArraySetBitForwardIterator e; + return e; + } + + EWAHBoolArraySetBitForwardIterator(const std::vector *parent, + size_t startpointer = 0) + : word(0), position(0), runningLength(0), literalPosition(0), + wordPosition(startpointer), wordLength(0), buffer(parent), + hasNext(false), hasValue(false), answer(0) { + if (wordPosition < buffer->size()) { + setRunningLengthWord(); + hasNext = moveToNext(); + if (hasNext) { + next(); + hasValue = true; + } } + } - EWAHBoolArraySetBitForwardIterator( - const EWAHBoolArraySetBitForwardIterator & o) : - buffer(o.buffer), mpointer(o.mpointer), - offsetofpreviousrun(o.offsetofpreviousrun), - currentrunoffset(o.currentrunoffset), rlw(o.rlw) { - } + EWAHBoolArraySetBitForwardIterator() + : word(0), position(0), runningLength(0), literalPosition(0), + wordPosition(0), wordLength(0), buffer(NULL), hasNext(false), + hasValue(false), answer(0) {} -private: + inline bool runningHasNext() const { return position < runningLength; } - bool advanceToNextSetBit() { - if (mpointer == buffer.size()) - return false; - if (currentrunoffset < static_cast (rlw.getRunningLength() - * wordinbits)) { - if (rlw.getRunningBit()) - return true;// nothing to do - currentrunoffset = static_cast (rlw.getRunningLength() - * wordinbits);//skipping - } - while (true) { - const size_t - indexoflitword = - static_cast ((currentrunoffset - - rlw.getRunningLength() * wordinbits) - / wordinbits); - if (indexoflitword >= rlw.getNumberOfLiteralWords()) { - if (advanceToNextRun()) - return advanceToNextSetBit(); - else { - return false; - } - } - - if (usetrailingzeros) { - - const uint32_t tinwordpointer = - static_cast ((currentrunoffset - - rlw.getRunningLength() * wordinbits) - % wordinbits); - const uword modcurrentword = - static_cast (buffer[mpointer + 1 - + indexoflitword] >> tinwordpointer); - if (modcurrentword != 0) { - currentrunoffset - += static_cast (numberOfTrailingZeros( - modcurrentword)); - return true; - } else { - currentrunoffset += wordinbits - tinwordpointer; - } - } else { - const uword currentword = buffer[mpointer + 1 + indexoflitword]; - for (uint32_t inwordpointer = - static_cast ((currentrunoffset - - rlw.getRunningLength() * wordinbits) - % wordinbits); inwordpointer < wordinbits; ++inwordpointer, ++currentrunoffset) { - if ((currentword - & (static_cast (1) << inwordpointer)) != 0) - return true; - } - } - } + inline bool literalHasNext() { + while (word == 0 && wordPosition < wordLength) { + word = (*buffer)[wordPosition++]; + literalPosition = position; + position += WORD_IN_BITS; } + return word != 0; + } - enum { - usetrailingzeros = true - };// optimization option - - bool advanceToNextRun() { - offsetofpreviousrun += currentrunoffset; - currentrunoffset = 0; - mpointer += static_cast (1 + rlw.getNumberOfLiteralWords()); - if (mpointer < buffer.size()) { - rlw.mydata = buffer[mpointer]; - } else { - return false; - } - return true; + inline void setRunningLengthWord() { + uword rlw = (*buffer)[wordPosition]; + runningLength = + (size_t)WORD_IN_BITS * RunningLengthWord::getRunningLength(rlw) + + position; + if (!RunningLengthWord::getRunningBit(rlw)) { + position = runningLength; } + wordPosition++; // point to first literal word + wordLength = + wordPosition + RunningLengthWord::getNumberOfLiteralWords(rlw); + } - EWAHBoolArraySetBitForwardIterator(const vector & parent, - size_t startpointer = 0) : - buffer(parent), mpointer(startpointer), offsetofpreviousrun(0), - currentrunoffset(0), rlw(0) { - if (mpointer < buffer.size()) { - rlw.mydata = buffer[mpointer]; - advanceToNextSetBit(); - } + inline bool moveToNext() { + while (!runningHasNext() && !literalHasNext()) { + if (wordPosition >= buffer->size()) { + return false; + } + setRunningLengthWord(); } + return true; + } - const vector & buffer; - size_t mpointer; - size_t offsetofpreviousrun; - size_t currentrunoffset; - friend class EWAHBoolArray ; - ConstRunningLengthWord rlw; + void next() { // update answer + if (runningHasNext()) { + answer = position++; + if (runningHasNext()) + return; + } else { + uword t = word & (~word + 1); + answer = literalPosition + countOnes((uword)(t - 1)); + word ^= t; + } + hasNext = moveToNext(); + } + + enum { WORD_IN_BITS = sizeof(uword) * 8 }; + uword word; // lit word + size_t position; + size_t runningLength; + size_t literalPosition; + size_t wordPosition; // points to word in buffer + uword wordLength; + const std::vector *buffer; + bool hasNext; + bool hasValue; + size_t answer; }; /** @@ -776,1137 +950,1357 @@ class EWAHBoolArraySetBitForwardIterator { */ class BitmapStatistics { public: - BitmapStatistics() : - totalliteral(0), totalcompressed(0), runningwordmarker(0), - maximumofrunningcounterreached(0) { - } - size_t getCompressedSize() const { - return totalliteral + runningwordmarker; - } - size_t getUncompressedSize() const { - return totalliteral + totalcompressed; - } - size_t getNumberOfDirtyWords() const { - return totalliteral; - } - size_t getNumberOfCleanWords() const { - return totalcompressed; - } - size_t getNumberOfMarkers() const { - return runningwordmarker; - } - size_t getOverRuns() const { - return maximumofrunningcounterreached; - } - size_t totalliteral; - size_t totalcompressed; - size_t runningwordmarker; - size_t maximumofrunningcounterreached; + BitmapStatistics() + : totalliteral(0), totalcompressed(0), runningwordmarker(0), + maximumofrunningcounterreached(0) {} + size_t getCompressedSize() const { return totalliteral + runningwordmarker; } + size_t getUncompressedSize() const { return totalliteral + totalcompressed; } + size_t getNumberOfDirtyWords() const { return totalliteral; } + size_t getNumberOfCleanWords() const { return totalcompressed; } + size_t getNumberOfMarkers() const { return runningwordmarker; } + size_t getOverRuns() const { return maximumofrunningcounterreached; } + size_t totalliteral; + size_t totalcompressed; + size_t runningwordmarker; + size_t maximumofrunningcounterreached; }; -template -bool EWAHBoolArray::set(size_t i) { - if(i < sizeinbits) return false; - const size_t dist = (i + wordinbits) / wordinbits - (sizeinbits - + wordinbits - 1) / wordinbits; - sizeinbits = i + 1; - if (dist > 0) {// easy - if(dist>1) fastaddStreamOfEmptyWords(false, dist - 1); - addLiteralWord( - static_cast (static_cast (1) << (i % wordinbits))); - return true; - } - RunningLengthWord lastRunningLengthWord(buffer[lastRLW]); - if (lastRunningLengthWord.getNumberOfLiteralWords() == 0) { - lastRunningLengthWord.setRunningLength( - static_cast (lastRunningLengthWord.getRunningLength() - - 1)); - addLiteralWord( - static_cast (static_cast (1) << (i % wordinbits))); - return true; - } - buffer[buffer.size() - 1] |= static_cast (static_cast (1) - << (i % wordinbits)); - // check if we just completed a stream of 1s - if (buffer[buffer.size() - 1] == static_cast (~0)) { - // we remove the last dirty word - buffer[buffer.size() - 1] = 0; - buffer.resize(buffer.size() - 1); - lastRunningLengthWord.setNumberOfLiteralWords( - static_cast (lastRunningLengthWord.getNumberOfLiteralWords() - - 1)); - // next we add one clean word - addEmptyWord(true); - } +template bool EWAHBoolArray::set(size_t i) { + if (i < sizeinbits) + return false; + const size_t dist = (i + wordinbits) / wordinbits - + (sizeinbits + wordinbits - 1) / wordinbits; + sizeinbits = i + 1; + if (dist > 0) { // easy + if (dist > 1) + fastaddStreamOfEmptyWords(false, dist - 1); + addLiteralWord( + static_cast(static_cast(1) << (i % wordinbits))); + return true; + } + RunningLengthWord lastRunningLengthWord(buffer[lastRLW]); + if (lastRunningLengthWord.getNumberOfLiteralWords() == 0) { + lastRunningLengthWord.setRunningLength( + static_cast(lastRunningLengthWord.getRunningLength() - 1)); + addLiteralWord( + static_cast(static_cast(1) << (i % wordinbits))); return true; + } + buffer[buffer.size() - 1] |= + static_cast(static_cast(1) << (i % wordinbits)); + // check if we just completed a stream of 1s + if (buffer[buffer.size() - 1] == static_cast(~0)) { + // we remove the last dirty word + buffer[buffer.size() - 1] = 0; + buffer.resize(buffer.size() - 1); + lastRunningLengthWord.setNumberOfLiteralWords(static_cast( + lastRunningLengthWord.getNumberOfLiteralWords() - 1)); + // next we add one clean word + addEmptyWord(true); + } + return true; } -template -void EWAHBoolArray::inplace_logicalnot() { - size_t pointer(0), lastrlw(0); - while (pointer < buffer.size()) { - RunningLengthWord rlw(buffer[pointer]); - lastrlw = pointer;// we save this up - if (rlw.getRunningBit()) - rlw.setRunningBit(false); - else - rlw.setRunningBit(true); - ++pointer; - for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) { - buffer[pointer] = static_cast(~buffer[pointer]); - ++pointer; - } - } - if(sizeinbits % wordinbits != 0){ - RunningLengthWord rlw(buffer[lastrlw]); -#ifdef EWAHASSERT - assert(rlw.getNumberOfLiteralWords() + rlw.getRunningLength() > 0); -#endif - const uword maskbogus = (static_cast(1) << (sizeinbits % wordinbits)) - 1; - if(rlw.getNumberOfLiteralWords()>0) {// easy case - buffer[lastrlw + 1 + rlw.getNumberOfLiteralWords() - 1 ] &= maskbogus; - } else if(rlw.getRunningBit()) { -#ifdef EWAHASSERT - assert(rlw.getNumberOfLiteralWords() > 0); -#endif - rlw.setNumberOfLiteralWords(rlw.getNumberOfLiteralWords() - 1); - addLiteralWord(maskbogus); - } +template void EWAHBoolArray::inplace_logicalnot() { + size_t pointer(0), lastrlw(0); + while (pointer < buffer.size()) { + RunningLengthWord rlw(buffer[pointer]); + lastrlw = pointer; // we save this up + if (rlw.getRunningBit()) + rlw.setRunningBit(false); + else + rlw.setRunningBit(true); + ++pointer; + for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) { + buffer[pointer] = static_cast(~buffer[pointer]); + ++pointer; + } + } + if (sizeinbits % wordinbits != 0) { + RunningLengthWord rlw(buffer[lastrlw]); + const uword maskbogus = + (static_cast(1) << (sizeinbits % wordinbits)) - 1; + if (rlw.getNumberOfLiteralWords() > 0) { // easy case + buffer[lastrlw + 1 + rlw.getNumberOfLiteralWords() - 1] &= maskbogus; + } else { + rlw.setRunningLength(rlw.getRunningLength() - 1); + addLiteralWord(maskbogus); } + } } -template -size_t EWAHBoolArray::numberOfOnes() const { - size_t tot(0); - size_t pointer(0); - while (pointer < buffer.size()) { - ConstRunningLengthWord rlw(buffer[pointer]); - if (rlw.getRunningBit()) { - tot += static_cast(rlw.getRunningLength() * wordinbits); - } - ++pointer; - for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) { -#ifdef EWAHASSERT - assert(countOnes(buffer[pointer]) < 64); -#endif - tot += countOnes(buffer[pointer]); - ++pointer; - } - } - return tot; +template size_t EWAHBoolArray::numberOfOnes() const { + size_t tot(0); + size_t pointer(0); + while (pointer < buffer.size()) { + ConstRunningLengthWord rlw(buffer[pointer]); + if (rlw.getRunningBit()) { + tot += static_cast(rlw.getRunningLength() * wordinbits); + } + ++pointer; + for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) { + tot += countOnes((uword)buffer[pointer]); + ++pointer; + } + } + return tot; } -template -vector EWAHBoolArray::toArray() const { - vector < size_t > ans; - size_t pos(0); - size_t pointer(0); - while (pointer < buffer.size()) { - ConstRunningLengthWord rlw(buffer[pointer]); - if (rlw.getRunningBit()) { - for (size_t k = 0; k < rlw.getRunningLength() * wordinbits; ++k, ++pos) { - ans.push_back(pos); - } - } else { - pos += static_cast(rlw.getRunningLength() * wordinbits); - } - ++pointer; - const bool usetrailing = true; //optimization - for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) { - if (usetrailing) { - uword myword = buffer[pointer]; - while (myword != 0) { - uint32_t ntz = numberOfTrailingZeros (myword); - ans.push_back(pos + ntz); - myword ^= (static_cast(1) << ntz); - } - pos += wordinbits; - } else { - for (int c = 0; c < wordinbits; ++c, ++pos) - if ((buffer[pointer] & (static_cast (1) << c)) != 0) { - ans.push_back(pos); - } - } - ++pointer; - } +template +std::vector EWAHBoolArray::toArray() const { + std::vector ans; + size_t pos(0); + size_t pointer(0); + const size_t buffersize = buffer.size(); + while (pointer < buffersize) { + ConstRunningLengthWord rlw(buffer[pointer]); + const size_t productofrl = + static_cast(rlw.getRunningLength() * wordinbits); + if (rlw.getRunningBit()) { + size_t upper_limit = pos + productofrl; + for (; pos < upper_limit; ++pos) { + ans.push_back(pos); + } + } else { + pos += productofrl; + } + ++pointer; + const size_t rlwlw = rlw.getNumberOfLiteralWords(); + for (size_t k = 0; k < rlwlw; ++k) { + uword myword = buffer[pointer]; + while (myword != 0) { + uint64_t t = myword & (~myword + 1); + uint32_t r = numberOfTrailingZeros(t); + ans.push_back(pos + r); + myword ^= t; + } + pos += wordinbits; + ++pointer; } - return ans; + } + return ans; } -template -void EWAHBoolArray::logicalnot(EWAHBoolArray & x) const { - x.reset(); - x.buffer.reserve(buffer.size()); - EWAHBoolArrayRawIterator i = this->raw_iterator(); - if(!i.hasNext()) return;// nothing to do - while (true) { - BufferedRunningLengthWord & rlw = i.next(); - if (i.hasNext()) { - if( rlw.getRunningLength()>0) x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(), - rlw.getRunningLength()); - if (rlw.getNumberOfLiteralWords() > 0) { - const uword * dw = i.dirtyWords(); - for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) { - x.addLiteralWord(~dw[k]); - } - } +template +void EWAHBoolArray::logicalnot(EWAHBoolArray &x) const { + x.reset(); + x.buffer.reserve(buffer.size()); + EWAHBoolArrayRawIterator i = this->raw_iterator(); + if (!i.hasNext()) + return; // nothing to do + while (true) { + BufferedRunningLengthWord &rlw = i.next(); + if (i.hasNext()) { + if (rlw.getRunningLength() > 0) + x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(), + rlw.getRunningLength()); + if (rlw.getNumberOfLiteralWords() > 0) { + const uword *dw = i.dirtyWords(); + for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) { + x.addLiteralWord(~dw[k]); + } + } + } else { + if (rlw.getNumberOfLiteralWords() == 0) { + if ((this->sizeinbits % wordinbits != 0) && !rlw.getRunningBit()) { + if (rlw.getRunningLength() > 1) + x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(), + rlw.getRunningLength() - 1); + const uword maskbogus = + (static_cast(1) << (this->sizeinbits % wordinbits)) - 1; + x.addLiteralWord(maskbogus); + break; } else { -#ifdef EWAHASSERT - assert(rlw.getNumberOfLiteralWords() + rlw.getRunningLength() > 0); -#endif - if(rlw.getNumberOfLiteralWords() == 0) { - if((this->sizeinbits % wordinbits != 0) && !rlw.getRunningBit()) { - if(rlw.getRunningLength()>1) x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(), - rlw.getRunningLength() - 1); - const uword maskbogus = (static_cast(1) << (this->sizeinbits % wordinbits)) - 1; - x.addLiteralWord(maskbogus); - break; - } else { - if(rlw.getRunningLength()>0) x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(), - rlw.getRunningLength()); - break; - } - } - if(rlw.getRunningLength()>0) x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(), - rlw.getRunningLength()); - const uword * dw = i.dirtyWords(); - for (size_t k = 0; k + 1 < rlw.getNumberOfLiteralWords() ; ++k) { - x.addLiteralWord(~dw[k]); - } - const uword maskbogus = (this->sizeinbits % wordinbits != 0) ? (static_cast(1) << (this->sizeinbits % wordinbits)) - 1 : ~static_cast(0); - x.addLiteralWord((~dw[rlw.getNumberOfLiteralWords() - 1]) & maskbogus); - break; + if (rlw.getRunningLength() > 0) + x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(), + rlw.getRunningLength()); + break; } - } - x.sizeinbits = this->sizeinbits; + } + if (rlw.getRunningLength() > 0) + x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(), + rlw.getRunningLength()); + const uword *dw = i.dirtyWords(); + for (size_t k = 0; k + 1 < rlw.getNumberOfLiteralWords(); ++k) { + x.addLiteralWord(~dw[k]); + } + const uword maskbogus = + (this->sizeinbits % wordinbits != 0) + ? (static_cast(1) << (this->sizeinbits % wordinbits)) - 1 + : ~static_cast(0); + x.addLiteralWord((~dw[rlw.getNumberOfLiteralWords() - 1]) & maskbogus); + break; + } + } + x.sizeinbits = this->sizeinbits; } -template +template size_t EWAHBoolArray::addWord(const uword newdata, - const uint32_t bitsthatmatter) { - sizeinbits += bitsthatmatter; - if (newdata == 0) { - return addEmptyWord(0); - } else if (newdata == static_cast (~0)) { - return addEmptyWord(1); - } else { - return addLiteralWord(newdata); - } + const uint32_t bitsthatmatter) { + sizeinbits += bitsthatmatter; + if (newdata == 0) { + return addEmptyWord(0); + } else if (newdata == static_cast(~0)) { + return addEmptyWord(1); + } else { + return addLiteralWord(newdata); + } } -template -inline void EWAHBoolArray::writeBuffer(ostream & out) const { - if (!buffer.empty()) - out.write(reinterpret_cast (&buffer[0]), - sizeof(uword) * buffer.size()); +template +inline void EWAHBoolArray::writeBuffer(std::ostream &out) const { + if (!buffer.empty()) + out.write(reinterpret_cast(&buffer[0]), + sizeof(uword) * buffer.size()); } -template -inline void EWAHBoolArray::readBuffer(istream & in, - const size_t buffersize) { - buffer.resize(buffersize); - if (buffersize > 0) - in.read(reinterpret_cast (&buffer[0]), - sizeof(uword) * buffersize); +template +inline void EWAHBoolArray::readBuffer(std::istream &in, + const size_t buffersize) { + buffer.resize(buffersize); + if (buffersize > 0) + in.read(reinterpret_cast(&buffer[0]), sizeof(uword) * buffersize); } -template -void EWAHBoolArray::write(ostream & out, const bool savesizeinbits) const { - if (savesizeinbits) - out.write(reinterpret_cast (&sizeinbits), - sizeof(sizeinbits)); - const size_t buffersize = buffer.size(); - out.write(reinterpret_cast (&buffersize), sizeof(buffersize)); - if (buffersize > 0) - out.write(reinterpret_cast (&buffer[0]), - static_cast (sizeof(uword) * buffersize)); +template +size_t EWAHBoolArray::write(std::ostream &out, + const bool savesizeinbits) const { + size_t written = 0; + if (savesizeinbits) { + out.write(reinterpret_cast(&sizeinbits), sizeof(sizeinbits)); + written += sizeof(sizeinbits); + } + const size_t buffersize = buffer.size(); + out.write(reinterpret_cast(&buffersize), sizeof(buffersize)); + written += sizeof(buffersize); + + if (buffersize > 0) { + out.write(reinterpret_cast(&buffer[0]), + static_cast(sizeof(uword) * buffersize)); + written += sizeof(uword) * buffersize; + } + return written; } -template -void EWAHBoolArray::read(istream & in, const bool savesizeinbits) { - if (savesizeinbits) - in.read(reinterpret_cast (&sizeinbits), sizeof(sizeinbits)); - else - sizeinbits = 0; - size_t buffersize(0); - in.read(reinterpret_cast (&buffersize), sizeof(buffersize)); - buffer.resize(buffersize); - if (buffersize > 0) - in.read(reinterpret_cast (&buffer[0]), - static_cast (sizeof(uword) * buffersize)); +template +size_t EWAHBoolArray::write(char * out, size_t capacity, + const bool savesizeinbits) const { + size_t written = 0; + if (savesizeinbits) { + if(capacity < sizeof(sizeinbits)) return 0; + capacity -= sizeof(sizeinbits); + memcpy(out, &sizeinbits, sizeof(sizeinbits)); + out += sizeof(sizeinbits); + written += sizeof(sizeinbits); + } + const size_t buffersize = buffer.size(); + if(capacity < sizeof(buffersize)) return 0; + capacity -= sizeof(buffersize); + memcpy(out, &buffersize, sizeof(buffersize)); + out += sizeof(buffersize); + written += sizeof(buffersize); + + if (buffersize > 0) { + if(capacity < sizeof(uword) * buffersize) return 0; + memcpy(out, &buffer[0], sizeof(uword) * buffersize); + written += sizeof(uword) * buffersize; + } + return written; } -template + +template +size_t EWAHBoolArray::read(std::istream &in, const bool savesizeinbits) { + size_t read = 0; + if (savesizeinbits) { + in.read(reinterpret_cast(&sizeinbits), sizeof(sizeinbits)); + read += sizeof(sizeinbits); + } else { + sizeinbits = 0; + } + size_t buffersize(0); + in.read(reinterpret_cast(&buffersize), sizeof(buffersize)); + read += sizeof(buffersize); + buffer.resize(buffersize); + if (buffersize > 0) { + in.read(reinterpret_cast(&buffer[0]), + static_cast(sizeof(uword) * buffersize)); + read += sizeof(uword) * buffersize; + } + return read; +} + + +template +size_t EWAHBoolArray::read(const char * in, size_t capacity, const bool savesizeinbits) { + size_t read = 0; + if (savesizeinbits) { + if(capacity < sizeof(sizeinbits)) return 0; + capacity -= sizeof(sizeinbits); + memcpy(reinterpret_cast(&sizeinbits), in, sizeof(sizeinbits)); + read += sizeof(sizeinbits); + in += sizeof(sizeinbits); + } else { + sizeinbits = 0; + } + size_t buffersize(0); + if(capacity < sizeof(buffersize)) return 0; + capacity -= sizeof(buffersize); + memcpy(reinterpret_cast(&buffersize), in, sizeof(buffersize)); + in += sizeof(buffersize); + read += sizeof(buffersize); + + buffer.resize(buffersize); + if (buffersize > 0) { + if(capacity < sizeof(uword) * buffersize) return 0; + memcpy(&buffer[0], in, sizeof(uword) * buffersize); + read += sizeof(uword) * buffersize; + } + return read; +} + +template size_t EWAHBoolArray::addLiteralWord(const uword newdata) { - RunningLengthWord lastRunningLengthWord(buffer[lastRLW]); - uword numbersofar = lastRunningLengthWord.getNumberOfLiteralWords(); - if (numbersofar >= RunningLengthWord::largestliteralcount) {//0x7FFF) { - buffer.push_back(0); - lastRLW = buffer.size() - 1; - RunningLengthWord lastRunningLengthWord2(buffer[lastRLW]); - lastRunningLengthWord2.setNumberOfLiteralWords(1); - buffer.push_back(newdata); - return 2; - } - lastRunningLengthWord.setNumberOfLiteralWords( - static_cast (numbersofar + 1)); -#ifdef EWAHASSERT - assert(lastRunningLengthWord.getNumberOfLiteralWords() == numbersofar + 1); -#endif + RunningLengthWord lastRunningLengthWord(buffer[lastRLW]); + uword numbersofar = lastRunningLengthWord.getNumberOfLiteralWords(); + if (numbersofar >= + RunningLengthWord::largestliteralcount) { // 0x7FFF) { + buffer.push_back(0); + lastRLW = buffer.size() - 1; + RunningLengthWord lastRunningLengthWord2(buffer[lastRLW]); + lastRunningLengthWord2.setNumberOfLiteralWords(1); buffer.push_back(newdata); - return 1; + return 2; + } + lastRunningLengthWord.setNumberOfLiteralWords( + static_cast(numbersofar + 1)); + buffer.push_back(newdata); + return 1; } -template +template size_t EWAHBoolArray::padWithZeroes(const size_t totalbits) { - size_t wordsadded = 0; - if ( totalbits <= sizeinbits ) - return wordsadded; - - size_t missingbits = totalbits - sizeinbits; - - - RunningLengthWord rlw( buffer[lastRLW] ); - if ( rlw.getNumberOfLiteralWords() > 0 ) - { - // Consume trailing zeroes of trailing literal word (past sizeinbits) - size_t remain = sizeinbits % wordinbits; - if ( remain > 0 ) // Is last word partial? - { - size_t avail = wordinbits - remain; - if ( avail > 0 ) - { - if ( missingbits > avail ) { - missingbits -= avail; - } else { - missingbits = 0; - } - sizeinbits += avail; - } - } - } - - if ( missingbits > 0 ) - { - size_t wordstoadd = missingbits / wordinbits; - if ( (missingbits % wordinbits) != 0) - ++wordstoadd; - - wordsadded = addStreamOfEmptyWords( false, wordstoadd ); - } -#ifdef EWAHASSERT - assert(sizeinbits >= totalbits); - assert(sizeinbits <= totalbits + wordinbits); -#endif - sizeinbits = totalbits; + size_t wordsadded = 0; + if (totalbits <= sizeinbits) return wordsadded; + + size_t missingbits = totalbits - sizeinbits; + + RunningLengthWord rlw(buffer[lastRLW]); + if (rlw.getNumberOfLiteralWords() > 0) { + // Consume trailing zeroes of trailing literal word (past sizeinbits) + size_t remain = sizeinbits % wordinbits; + if (remain > 0) // Is last word partial? + { + size_t avail = wordinbits - remain; + if (avail > 0) { + if (missingbits > avail) { + missingbits -= avail; + } else { + missingbits = 0; + } + sizeinbits += avail; + } + } + } + + if (missingbits > 0) { + size_t wordstoadd = missingbits / wordinbits; + if ((missingbits % wordinbits) != 0) + ++wordstoadd; + + wordsadded = addStreamOfEmptyWords(false, wordstoadd); + } + sizeinbits = totalbits; + return wordsadded; } /** * This is a low-level iterator. */ -template -class EWAHBoolArrayRawIterator { +template class EWAHBoolArrayRawIterator { public: - - EWAHBoolArrayRawIterator(const EWAHBoolArray & p) : - pointer(0), myparent(&p.getBuffer()), rlw((*myparent)[pointer], this) { - } - EWAHBoolArrayRawIterator(const EWAHBoolArrayRawIterator & o) : - pointer(o.pointer), myparent(o.myparent), rlw(o.rlw) { - } - - bool hasNext() const { - return pointer < myparent->size(); - } - - BufferedRunningLengthWord & next() { -#ifdef EWAHASSERT - assert(pointer < myparent->size()); -#endif - rlw.read((*myparent)[pointer]); - pointer = static_cast (pointer + rlw.getNumberOfLiteralWords() - + 1); - return rlw; - } - - const uword * dirtyWords() const { -#ifdef EWAHASSERT - assert(pointer > 0); - assert(pointer >= rlw.getNumberOfLiteralWords()); -#endif - return myparent->data() + - static_cast (pointer - rlw.getNumberOfLiteralWords()); - } - - EWAHBoolArrayRawIterator & operator=(const EWAHBoolArrayRawIterator & other) { - pointer = other.pointer; - myparent = other.myparent; - rlw = other.rlw; - return *this; - } - - size_t pointer; - const vector * myparent; - BufferedRunningLengthWord rlw; - - EWAHBoolArrayRawIterator(); + EWAHBoolArrayRawIterator(const EWAHBoolArray &p) + : pointer(0), myparent(&p.getBuffer()), rlw((*myparent)[pointer], this) {} + EWAHBoolArrayRawIterator(const EWAHBoolArrayRawIterator &o) + : pointer(o.pointer), myparent(o.myparent), rlw(o.rlw) {} + + bool hasNext() const { return pointer < myparent->size(); } + + BufferedRunningLengthWord &next() { + rlw.read((*myparent)[pointer]); + pointer = static_cast(pointer + rlw.getNumberOfLiteralWords() + 1); + return rlw; + } + + const uword *dirtyWords() const { + return myparent->data() + + static_cast(pointer - rlw.getNumberOfLiteralWords()); + } + + EWAHBoolArrayRawIterator &operator=(const EWAHBoolArrayRawIterator &other) { + pointer = other.pointer; + myparent = other.myparent; + rlw = other.rlw; + return *this; + } + + size_t pointer; + const std::vector *myparent; + BufferedRunningLengthWord rlw; + + EWAHBoolArrayRawIterator(); }; -template +template EWAHBoolArrayIterator EWAHBoolArray::uncompress() const { - return EWAHBoolArrayIterator (buffer); + return EWAHBoolArrayIterator(buffer); } -template +template EWAHBoolArrayRawIterator EWAHBoolArray::raw_iterator() const { - return EWAHBoolArrayRawIterator (*this); -} - - -#ifndef ALTEQUAL - -template -bool EWAHBoolArray::operator==(const EWAHBoolArray & a) const { - EWAHBoolArrayRawIterator i = a.raw_iterator(); - EWAHBoolArrayRawIterator j = raw_iterator(); - if (!(i.hasNext() and j.hasNext())) {// hopefully this never happens... - return true; - } - // at this point, this should be safe: - BufferedRunningLengthWord & rlwi = i.next(); - BufferedRunningLengthWord & rlwj = j.next(); - //RunningLength; - while (true) { - bool i_is_prey(rlwi.size() < rlwj.size()); - BufferedRunningLengthWord & prey(i_is_prey ? rlwi : rlwj); - BufferedRunningLengthWord & predator(i_is_prey ? rlwj : rlwi); - uword predatorrl(predator.getRunningLength()); - const uword preyrl(prey.getRunningLength()); - if (predatorrl >= preyrl) { - const uword tobediscarded = preyrl; - if(tobediscarded) - if(prey.getRunningBit() ^ predator.getRunningBit()) - return false; - } else { - const uword tobediscarded = predatorrl; - if(predatorrl>0) { - if(prey.getRunningBit() ^ predator.getRunningBit()) - return false; - } - if (preyrl - tobediscarded > 0) { - return false; - } - } - predator.discardFirstWords(preyrl); - prey.discardFirstWords(preyrl); - - predatorrl = predator.getRunningLength(); - if (predatorrl > 0) { - - const uword nbre_dirty_prey(prey.getNumberOfLiteralWords()); - const uword tobediscarded = - (predatorrl >= nbre_dirty_prey) ? nbre_dirty_prey - : predatorrl; - if (tobediscarded > 0) { - return false; - } - } - // all that is left to do now is to AND the dirty words - uword nbre_dirty_prey(prey.getNumberOfLiteralWords()); - if (nbre_dirty_prey > 0) { - const uword * idirty = i.dirtyWords(); - const uword * jdirty = j.dirtyWords(); - - for (uword k = 0; k < nbre_dirty_prey; ++k) { - if((idirty[k] ^ jdirty[k])!=0) return false; - } - predator.discardFirstWords(nbre_dirty_prey); - } - if (i_is_prey) { - if (!i.hasNext()) - break; - rlwi = i.next(); - } else { - if (!j.hasNext()) - break; - rlwj = j.next(); - } - } - return true; + return EWAHBoolArrayRawIterator(*this); } -#else - -template -bool EWAHBoolArray::operator==(const EWAHBoolArray & x) const { - EWAHBoolArrayRawIterator i = x.raw_iterator(); - EWAHBoolArrayRawIterator j = raw_iterator(); - if (!(i.hasNext() and j.hasNext())) {// hopefully this never happens... - return true; - } - // at this point, this should be safe: - BufferedRunningLengthWord & rlwi = i.next(); - BufferedRunningLengthWord & rlwj = j.next(); - - - while ((rlwi.size() > 0) && (rlwj.size() > 0)) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - const bool i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); - BufferedRunningLengthWord & prey = i_is_prey ? rlwi : rlwj; - BufferedRunningLengthWord & predator = i_is_prey ? rlwj : rlwi; - size_t index = 0; - const bool nonzero = ((!predator.getRunningBit()) ? prey.nonzero_discharge( - predator.getRunningLength(),index) : prey.nonzero_dischargeNegated( - predator.getRunningLength(),index)); - if(nonzero) { - return false; - } - if(predator.getRunningLength() - index > 0) { - if(predator.getRunningBit()) { - return false; - } - } - predator.discardRunningWordsWithReload(); - - } - const size_t nbre_literal = min(rlwi.getNumberOfLiteralWords(),rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - for (size_t k = 0; k < nbre_literal; ++k) - if((rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k))!=0) return false; - rlwi.discardFirstWordsWithReload(nbre_literal); - rlwj.discardFirstWordsWithReload(nbre_literal); +template +bool EWAHBoolArray::operator==(const EWAHBoolArray &x) const { + EWAHBoolArrayRawIterator i = x.raw_iterator(); + EWAHBoolArrayRawIterator j = raw_iterator(); + if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens... + return (i.hasNext() == false) && (j.hasNext() == false); + } + // at this point, this should be safe: + BufferedRunningLengthWord &rlwi = i.next(); + BufferedRunningLengthWord &rlwj = j.next(); + + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); + BufferedRunningLengthWord &prey = i_is_prey ? rlwi : rlwj; + BufferedRunningLengthWord &predator = i_is_prey ? rlwj : rlwi; + size_t index = 0; + const bool nonzero = + ((!predator.getRunningBit()) + ? prey.nonzero_discharge(predator.getRunningLength(), index) + : prey.nonzero_dischargeNegated(predator.getRunningLength(), + index)); + if (nonzero) { + return false; + } + if (predator.getRunningLength() - index > 0) { + if (predator.getRunningBit()) { + return false; } + } + predator.discardRunningWordsWithReload(); } - const bool i_remains = rlwi.size() > 0; - BufferedRunningLengthWord & remaining = i_remains ? rlwi : rlwj; - return !remaining.nonzero_discharge(); + const size_t nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (size_t k = 0; k < nbre_literal; ++k) + if ((rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)) != 0) + return false; + rlwi.discardLiteralWordsWithReload(nbre_literal); + rlwj.discardLiteralWordsWithReload(nbre_literal); + } + } + const bool i_remains = rlwi.size() > 0; + BufferedRunningLengthWord &remaining = i_remains ? rlwi : rlwj; + return !remaining.nonzero_discharge(); } -#endif - -template -void EWAHBoolArray::swap(EWAHBoolArray & x) { - buffer.swap(x.buffer); - size_t tmp = x.sizeinbits; - x.sizeinbits = sizeinbits; - sizeinbits = tmp; - tmp = x.lastRLW; - x.lastRLW = lastRLW; - lastRLW = tmp; +template void EWAHBoolArray::swap(EWAHBoolArray &x) { + buffer.swap(x.buffer); + size_t tmp = x.sizeinbits; + x.sizeinbits = sizeinbits; + sizeinbits = tmp; + tmp = x.lastRLW; + x.lastRLW = lastRLW; + lastRLW = tmp; } -template -void EWAHBoolArray::append(const EWAHBoolArray & x) { - if (sizeinbits % wordinbits == 0) { - // hoping for the best? - sizeinbits += x.sizeinbits; - ConstRunningLengthWord lRLW(buffer[lastRLW]); - if ((lRLW.getRunningLength() == 0) && (lRLW.getNumberOfLiteralWords() - == 0)) { - // it could be that the running length word is empty, in such a case, - // we want to get rid of it! -#ifdef EWAHASSERT - assert(lastRLW == buffer.size() - 1); -#endif - lastRLW = x.lastRLW + buffer.size() - 1; - buffer.resize(buffer.size() - 1); - buffer.insert(buffer.end(), x.buffer.begin(), x.buffer.end()); - } else { - lastRLW = x.lastRLW + buffer.size(); - buffer.insert(buffer.end(), x.buffer.begin(), x.buffer.end()); - } +template +void EWAHBoolArray::append(const EWAHBoolArray &x) { + if (sizeinbits % wordinbits == 0) { + // hoping for the best? + sizeinbits += x.sizeinbits; + ConstRunningLengthWord lRLW(buffer[lastRLW]); + if ((lRLW.getRunningLength() == 0) && + (lRLW.getNumberOfLiteralWords() == 0)) { + // it could be that the running length word is empty, in such a case, + // we want to get rid of it! + lastRLW = x.lastRLW + buffer.size() - 1; + buffer.resize(buffer.size() - 1); + buffer.insert(buffer.end(), x.buffer.begin(), x.buffer.end()); } else { - stringstream ss; - ss - << "This should really not happen! You are trying to append to a bitmap having a fractional number of words, that is, " - << static_cast (sizeinbits) - << " bits with a word size in bits of " - << static_cast (wordinbits) << ". "; - ss << "Size of the bitmap being appended: " << x.sizeinbits << " bits." - << endl; - throw invalid_argument(ss.str()); - } + lastRLW = x.lastRLW + buffer.size(); + buffer.insert(buffer.end(), x.buffer.begin(), x.buffer.end()); + } + } else { + std::stringstream ss; + ss << "This should really not happen! You are trying to append to a bitmap " + "having a fractional number of words, that is, " + << static_cast(sizeinbits) << " bits with a word size in bits of " + << static_cast(wordinbits) << ". "; + ss << "Size of the bitmap being appended: " << x.sizeinbits << " bits." + << std::endl; + throw std::invalid_argument(ss.str()); + } } -template +template EWAHBoolArrayIterator::EWAHBoolArrayIterator( - const vector & parent) : - pointer(0), myparent(parent), compressedwords(0), literalwords(0), rl(0), - lw(0), b(0) { - if (pointer < myparent.size()) - readNewRunningLengthWord(); + const std::vector &parent) + : pointer(0), myparent(parent), compressedwords(0), literalwords(0), rl(0), + lw(0), b(0) { + if (pointer < myparent.size()) + readNewRunningLengthWord(); } -template +template void EWAHBoolArrayIterator::readNewRunningLengthWord() { - literalwords = 0; - compressedwords = 0; - ConstRunningLengthWord rlw(myparent[pointer]); - rl = rlw.getRunningLength(); - lw = rlw.getNumberOfLiteralWords(); - b = rlw.getRunningBit(); - if ((rl == 0) && (lw == 0)) { - if (pointer < myparent.size() - 1) { - ++pointer; - readNewRunningLengthWord(); - } else { -#ifdef EWAHASSERT - assert(pointer >= myparent.size() - 1); -#endif - pointer = myparent.size(); -#ifdef EWAHASSERT - assert(!hasNext()); -#endif - } + literalwords = 0; + compressedwords = 0; + ConstRunningLengthWord rlw(myparent[pointer]); + rl = rlw.getRunningLength(); + lw = rlw.getNumberOfLiteralWords(); + b = rlw.getRunningBit(); + if ((rl == 0) && (lw == 0)) { + if (pointer < myparent.size() - 1) { + ++pointer; + readNewRunningLengthWord(); + } else { + pointer = myparent.size(); } + } } -template +template BoolArray EWAHBoolArray::toBoolArray() const { - BoolArray ans(sizeinbits); - EWAHBoolArrayIterator i = uncompress(); - size_t counter = 0; - while (i.hasNext()) { - ans.setWord(counter++, i.next()); - } - return ans; + BoolArray ans(sizeinbits); + EWAHBoolArrayIterator i = uncompress(); + size_t counter = 0; + while (i.hasNext()) { + ans.setWord(counter++, i.next()); + } + return ans; } -template -template -void EWAHBoolArray::appendRowIDs(container & out, const size_t offset) const { - size_t pointer(0); - size_t currentoffset(offset); - if (RESERVEMEMORY) - out.reserve(buffer.size() + 64);// trading memory for speed. - while (pointer < buffer.size()) { - ConstRunningLengthWord rlw(buffer[pointer]); - if (rlw.getRunningBit()) { - for (size_t x = 0; x < static_cast (rlw.getRunningLength() - * wordinbits); ++x) { - out.push_back(currentoffset + x); - } - } - currentoffset = static_cast (currentoffset - + rlw.getRunningLength() * wordinbits); - ++pointer; - for (uword k = 0; k < rlw.getNumberOfLiteralWords(); ++k) { - const uword currentword = buffer[pointer]; - for (uint32_t kk = 0; kk < wordinbits; ++kk) { - if ((currentword & static_cast (static_cast (1) - << kk)) != 0) - out.push_back(currentoffset + kk); - } - currentoffset += wordinbits; - ++pointer; - } +template +template +void EWAHBoolArray::appendRowIDs(container &out, + const size_t offset) const { + size_t pointer(0); + size_t currentoffset(offset); + if (RESERVEMEMORY) + out.reserve(buffer.size() + 64); // trading memory for speed. + const size_t buffersize = buffer.size(); + while (pointer < buffersize) { + ConstRunningLengthWord rlw(buffer[pointer]); + const size_t productofrl = + static_cast(rlw.getRunningLength() * wordinbits); + if (rlw.getRunningBit()) { + const size_t upper_limit = currentoffset + productofrl; + for (; currentoffset < upper_limit; ++currentoffset) { + out.push_back(currentoffset); + } + } else { + currentoffset += productofrl; + } + ++pointer; + const size_t rlwlw = rlw.getNumberOfLiteralWords(); + for (uword k = 0; k < rlwlw; ++k) { + uword currentword = buffer[pointer]; + while (currentword != 0) { + uint64_t t = currentword & -currentword; + uint32_t r = numberOfTrailingZeros(t); + out.push_back(currentoffset + r); + currentword ^= t; + } + currentoffset += wordinbits; + ++pointer; } + } } -template -bool EWAHBoolArray::operator!=(const EWAHBoolArray & x) const { - return !(*this == x); +template +bool EWAHBoolArray::operator!=(const EWAHBoolArray &x) const { + return !(*this == x); } -template -bool EWAHBoolArray::operator==(const BoolArray & x) const { - // could be more efficient - return (this->toBoolArray() == x); +template +bool EWAHBoolArray::operator==(const BoolArray &x) const { + // could be more efficient + return (this->toBoolArray() == x); } -template -bool EWAHBoolArray::operator!=(const BoolArray & x) const { - // could be more efficient - return (this->toBoolArray() != x); +template +bool EWAHBoolArray::operator!=(const BoolArray &x) const { + // could be more efficient + return (this->toBoolArray() != x); } -template -size_t EWAHBoolArray::addStreamOfEmptyWords(const bool v, size_t number) { - if (number == 0) - return 0; - sizeinbits += number * wordinbits; - size_t wordsadded = 0; - if ((RunningLengthWord::getRunningBit(buffer[lastRLW]) != v) - && (RunningLengthWord::size(buffer[lastRLW]) == 0)) { - RunningLengthWord::setRunningBit(buffer[lastRLW], v); - } else if ((RunningLengthWord::getNumberOfLiteralWords( - buffer[lastRLW]) != 0) || (RunningLengthWord::getRunningBit( - buffer[lastRLW]) != v)) { - buffer.push_back(0); - ++wordsadded; - lastRLW = buffer.size() - 1; - if (v) - RunningLengthWord::setRunningBit(buffer[lastRLW], v); - } - const uword runlen = RunningLengthWord::getRunningLength( - buffer[lastRLW]); - - const uword - whatwecanadd = - number - < static_cast (RunningLengthWord::largestrunninglengthcount - - runlen) ? static_cast (number) - : static_cast (RunningLengthWord::largestrunninglengthcount - - runlen); +template +size_t EWAHBoolArray::addStreamOfEmptyWords(const bool v, + size_t number) { + if (number == 0) + return 0; + sizeinbits += number * wordinbits; + size_t wordsadded = 0; + if ((RunningLengthWord::getRunningBit(buffer[lastRLW]) != v) && + (RunningLengthWord::size(buffer[lastRLW]) == 0)) { + RunningLengthWord::setRunningBit(buffer[lastRLW], v); + } else if ((RunningLengthWord::getNumberOfLiteralWords( + buffer[lastRLW]) != 0) || + (RunningLengthWord::getRunningBit(buffer[lastRLW]) != v)) { + buffer.push_back(0); + ++wordsadded; + lastRLW = buffer.size() - 1; + if (v) + RunningLengthWord::setRunningBit(buffer[lastRLW], v); + } + const uword runlen = + RunningLengthWord::getRunningLength(buffer[lastRLW]); + + const uword whatwecanadd = + number < static_cast( + RunningLengthWord::largestrunninglengthcount - runlen) + ? static_cast(number) + : static_cast( + RunningLengthWord::largestrunninglengthcount - runlen); + RunningLengthWord::setRunningLength( + buffer[lastRLW], static_cast(runlen + whatwecanadd)); + + number -= static_cast(whatwecanadd); + while (number >= RunningLengthWord::largestrunninglengthcount) { + buffer.push_back(0); + ++wordsadded; + lastRLW = buffer.size() - 1; + if (v) + RunningLengthWord::setRunningBit(buffer[lastRLW], v); + RunningLengthWord::setRunningLength( + buffer[lastRLW], RunningLengthWord::largestrunninglengthcount); + number -= static_cast( + RunningLengthWord::largestrunninglengthcount); + } + if (number > 0) { + buffer.push_back(0); + ++wordsadded; + lastRLW = buffer.size() - 1; + if (v) + RunningLengthWord::setRunningBit(buffer[lastRLW], v); RunningLengthWord::setRunningLength(buffer[lastRLW], - static_cast (runlen + whatwecanadd)); - - number -= static_cast (whatwecanadd); - while (number >= RunningLengthWord::largestrunninglengthcount) { - buffer.push_back(0); - ++wordsadded; - lastRLW = buffer.size() - 1; - if (v) - RunningLengthWord::setRunningBit(buffer[lastRLW], v); - RunningLengthWord::setRunningLength(buffer[lastRLW], - RunningLengthWord::largestrunninglengthcount); - number - -= static_cast (RunningLengthWord::largestrunninglengthcount); - } - if (number > 0) { - buffer.push_back(0); - ++wordsadded; - lastRLW = buffer.size() - 1; - if (v) - RunningLengthWord::setRunningBit(buffer[lastRLW], v); - RunningLengthWord::setRunningLength(buffer[lastRLW], - static_cast (number)); - } - return wordsadded; + static_cast(number)); + } + return wordsadded; } - -template -void EWAHBoolArray::fastaddStreamOfEmptyWords(const bool v, size_t number) { - if ((RunningLengthWord::getRunningBit(buffer[lastRLW]) != v) - && (RunningLengthWord::size(buffer[lastRLW]) == 0)) { - RunningLengthWord::setRunningBit(buffer[lastRLW], v); - } else if ((RunningLengthWord::getNumberOfLiteralWords( - buffer[lastRLW]) != 0) || (RunningLengthWord::getRunningBit( - buffer[lastRLW]) != v)) { - buffer.push_back(0); - lastRLW = buffer.size() - 1; - if (v) - RunningLengthWord::setRunningBit(buffer[lastRLW], v); - } - const uword runlen = RunningLengthWord::getRunningLength( - buffer[lastRLW]); - - const uword - whatwecanadd = - number - < static_cast (RunningLengthWord::largestrunninglengthcount - - runlen) ? static_cast (number) - : static_cast (RunningLengthWord::largestrunninglengthcount - - runlen); +template +void EWAHBoolArray::fastaddStreamOfEmptyWords(const bool v, + size_t number) { + if (number == 0) + return; + if ((RunningLengthWord::getRunningBit(buffer[lastRLW]) != v) && + (RunningLengthWord::size(buffer[lastRLW]) == 0)) { + RunningLengthWord::setRunningBit(buffer[lastRLW], v); + } else if ((RunningLengthWord::getNumberOfLiteralWords( + buffer[lastRLW]) != 0) || + (RunningLengthWord::getRunningBit(buffer[lastRLW]) != v)) { + buffer.push_back(0); + lastRLW = buffer.size() - 1; + if (v) + RunningLengthWord::setRunningBit(buffer[lastRLW], v); + } + const uword runlen = + RunningLengthWord::getRunningLength(buffer[lastRLW]); + + const uword whatwecanadd = + number < static_cast( + RunningLengthWord::largestrunninglengthcount - runlen) + ? static_cast(number) + : static_cast( + RunningLengthWord::largestrunninglengthcount - runlen); + RunningLengthWord::setRunningLength( + buffer[lastRLW], static_cast(runlen + whatwecanadd)); + + number -= static_cast(whatwecanadd); + while (number >= RunningLengthWord::largestrunninglengthcount) { + buffer.push_back(0); + lastRLW = buffer.size() - 1; + if (v) + RunningLengthWord::setRunningBit(buffer[lastRLW], v); + RunningLengthWord::setRunningLength( + buffer[lastRLW], RunningLengthWord::largestrunninglengthcount); + number -= static_cast( + RunningLengthWord::largestrunninglengthcount); + } + if (number > 0) { + buffer.push_back(0); + lastRLW = buffer.size() - 1; + if (v) + RunningLengthWord::setRunningBit(buffer[lastRLW], v); RunningLengthWord::setRunningLength(buffer[lastRLW], - static_cast (runlen + whatwecanadd)); - - number -= static_cast (whatwecanadd); - while (number >= RunningLengthWord::largestrunninglengthcount) { - buffer.push_back(0); - lastRLW = buffer.size() - 1; - if (v) - RunningLengthWord::setRunningBit(buffer[lastRLW], v); - RunningLengthWord::setRunningLength(buffer[lastRLW], - RunningLengthWord::largestrunninglengthcount); - number - -= static_cast (RunningLengthWord::largestrunninglengthcount); - } - if (number > 0) { - buffer.push_back(0); - lastRLW = buffer.size() - 1; - if (v) - RunningLengthWord::setRunningBit(buffer[lastRLW], v); - RunningLengthWord::setRunningLength(buffer[lastRLW], - static_cast (number)); - } + static_cast(number)); + } } +template +size_t EWAHBoolArray::addStreamOfDirtyWords(const uword *v, + const size_t number) { + if (number == 0) + return 0; + uword rlw = buffer[lastRLW]; + size_t NumberOfLiteralWords = + RunningLengthWord::getNumberOfLiteralWords(rlw); + if (NumberOfLiteralWords + number <= + RunningLengthWord::largestliteralcount) { + RunningLengthWord::setNumberOfLiteralWords( + rlw, NumberOfLiteralWords + number); + buffer[lastRLW] = rlw; + sizeinbits += number * wordinbits; + buffer.insert(buffer.end(), v, v + number); + return number; + } + // we proceed the long way + size_t howmanywecanadd = + RunningLengthWord::largestliteralcount - NumberOfLiteralWords; + RunningLengthWord::setNumberOfLiteralWords( + rlw, RunningLengthWord::largestliteralcount); + buffer[lastRLW] = rlw; + buffer.insert(buffer.end(), v, v + howmanywecanadd); + size_t wordadded = howmanywecanadd; + sizeinbits += howmanywecanadd * wordinbits; + buffer.push_back(0); + lastRLW = buffer.size() - 1; + ++wordadded; + wordadded += + addStreamOfDirtyWords(v + howmanywecanadd, number - howmanywecanadd); + return wordadded; +} -template -size_t EWAHBoolArray::addStreamOfDirtyWords(const uword * v, - const size_t number) { - if (number == 0) - return 0; - RunningLengthWord lastRunningLengthWord(buffer[lastRLW]); - const uword NumberOfLiteralWords = - lastRunningLengthWord.getNumberOfLiteralWords(); -#ifdef EWAHASSERT - assert( - RunningLengthWord::largestliteralcount - >= NumberOfLiteralWords); -#endif - const size_t - whatwecanadd = - number - < static_cast (RunningLengthWord::largestliteralcount - - NumberOfLiteralWords) ? number - : static_cast (RunningLengthWord::largestliteralcount - - NumberOfLiteralWords);//0x7FFF-NumberOfLiteralWords); -#ifdef EWAHASSERT - assert(NumberOfLiteralWords + whatwecanadd >= NumberOfLiteralWords); - assert( - NumberOfLiteralWords + whatwecanadd - <= RunningLengthWord::largestliteralcount); -#endif - lastRunningLengthWord.setNumberOfLiteralWords( - static_cast (NumberOfLiteralWords + whatwecanadd)); -#ifdef EWAHASSERT - assert( - lastRunningLengthWord.getNumberOfLiteralWords() - == NumberOfLiteralWords + whatwecanadd); -#endif - const size_t leftovernumber = number - whatwecanadd; - // add the dirty words... - const size_t oldsize(buffer.size()); - buffer.resize(oldsize + whatwecanadd); - memcpy(&buffer[oldsize], v, whatwecanadd * sizeof(uword)); - sizeinbits += whatwecanadd * wordinbits; - size_t wordsadded(whatwecanadd); - if (leftovernumber > 0) { - //add - buffer.push_back(0); - lastRLW = buffer.size() - 1; - ++wordsadded; - wordsadded += addStreamOfDirtyWords(v + whatwecanadd, leftovernumber); - } -#ifdef EWAHASSERT - assert(wordsadded >= number); -#endif - return wordsadded; +template +void EWAHBoolArray::fastaddStreamOfDirtyWords(const uword *v, + const size_t number) { + if (number == 0) + return; + uword rlw = buffer[lastRLW]; + size_t NumberOfLiteralWords = + RunningLengthWord::getNumberOfLiteralWords(rlw); + if (NumberOfLiteralWords + number <= + RunningLengthWord::largestliteralcount) { + RunningLengthWord::setNumberOfLiteralWords( + rlw, NumberOfLiteralWords + number); + buffer[lastRLW] = rlw; + for (size_t i = 0; i < number; ++i) + buffer.push_back(v[i]); + // buffer.insert(buffer.end(), v, v+number); // seems slower than push_back? + return; + } + // we proceed the long way + size_t howmanywecanadd = + RunningLengthWord::largestliteralcount - NumberOfLiteralWords; + RunningLengthWord::setNumberOfLiteralWords( + rlw, RunningLengthWord::largestliteralcount); + buffer[lastRLW] = rlw; + for (size_t i = 0; i < howmanywecanadd; ++i) + buffer.push_back(v[i]); + // buffer.insert(buffer.end(), v, v+howmanywecanadd);// seems slower than + // push_back? + buffer.push_back(0); + lastRLW = buffer.size() - 1; + fastaddStreamOfDirtyWords(v + howmanywecanadd, number - howmanywecanadd); } -template -size_t EWAHBoolArray::addStreamOfNegatedDirtyWords(const uword * v, - const size_t number) { - if (number == 0) - return 0; - RunningLengthWord lastRunningLengthWord(buffer[lastRLW]); - const uword NumberOfLiteralWords = - lastRunningLengthWord.getNumberOfLiteralWords(); -#ifdef EWAHASSERT - assert( - RunningLengthWord::largestliteralcount - >= NumberOfLiteralWords); -#endif - const size_t - whatwecanadd = - number - < static_cast (RunningLengthWord::largestliteralcount - - NumberOfLiteralWords) ? number - : static_cast (RunningLengthWord::largestliteralcount - - NumberOfLiteralWords);//0x7FFF-NumberOfLiteralWords); -#ifdef EWAHASSERT - assert(NumberOfLiteralWords + whatwecanadd >= NumberOfLiteralWords); - assert( - NumberOfLiteralWords + whatwecanadd - <= RunningLengthWord::largestliteralcount); -#endif - lastRunningLengthWord.setNumberOfLiteralWords( - static_cast (NumberOfLiteralWords + whatwecanadd)); -#ifdef EWAHASSERT - assert( - lastRunningLengthWord.getNumberOfLiteralWords() - == NumberOfLiteralWords + whatwecanadd); -#endif - const size_t leftovernumber = number - whatwecanadd; - // add the dirty words... - const size_t oldsize(buffer.size()); - buffer.resize(oldsize + whatwecanadd); - for(size_t k = 0; k 0) { - //add - buffer.push_back(0); - lastRLW = buffer.size() - 1; - ++wordsadded; - wordsadded += addStreamOfDirtyWords(v + whatwecanadd, leftovernumber); - } -#ifdef EWAHASSERT - assert(wordsadded >= number); -#endif - return wordsadded; +template +size_t EWAHBoolArray::addStreamOfNegatedDirtyWords(const uword *v, + const size_t number) { + if (number == 0) + return 0; + uword rlw = buffer[lastRLW]; + size_t NumberOfLiteralWords = + RunningLengthWord::getNumberOfLiteralWords(rlw); + if (NumberOfLiteralWords + number <= + RunningLengthWord::largestliteralcount) { + RunningLengthWord::setNumberOfLiteralWords( + rlw, NumberOfLiteralWords + number); + buffer[lastRLW] = rlw; + sizeinbits += number * wordinbits; + for (size_t k = 0; k < number; ++k) + buffer.push_back(~v[k]); + return number; + } + // we proceed the long way + size_t howmanywecanadd = + RunningLengthWord::largestliteralcount - NumberOfLiteralWords; + RunningLengthWord::setNumberOfLiteralWords( + rlw, RunningLengthWord::largestliteralcount); + buffer[lastRLW] = rlw; + for (size_t k = 0; k < howmanywecanadd; ++k) + buffer.push_back(~v[k]); + size_t wordadded = howmanywecanadd; + sizeinbits += howmanywecanadd * wordinbits; + buffer.push_back(0); + lastRLW = buffer.size() - 1; + ++wordadded; + wordadded += + addStreamOfDirtyWords(v + howmanywecanadd, number - howmanywecanadd); + return wordadded; } -template -size_t EWAHBoolArray::addEmptyWord(const bool v) { - RunningLengthWord lastRunningLengthWord(buffer[lastRLW]); - const bool noliteralword = (lastRunningLengthWord.getNumberOfLiteralWords() - == 0); - //first, if the last running length word is empty, we align it - // this - uword runlen = lastRunningLengthWord.getRunningLength(); - if ((noliteralword) && (runlen == 0)) { - lastRunningLengthWord.setRunningBit(v); -#ifdef EWAHASSERT - assert(lastRunningLengthWord.getRunningBit() == v); -#endif - } - if ((noliteralword) && (lastRunningLengthWord.getRunningBit() == v) - && (runlen < RunningLengthWord::largestrunninglengthcount)) { - lastRunningLengthWord.setRunningLength(static_cast (runlen + 1)); -#ifdef EWAHASSERT - assert(lastRunningLengthWord.getRunningLength() == runlen + 1); -#endif - return 0; - } else { - // we have to start anew - buffer.push_back(0); - lastRLW = buffer.size() - 1; - RunningLengthWord lastRunningLengthWord2(buffer[lastRLW]); -#ifdef EWAHASSERT - assert(lastRunningLengthWord2.getRunningLength() == 0); - assert(lastRunningLengthWord2.getRunningBit() == 0); - assert(lastRunningLengthWord2.getNumberOfLiteralWords() == 0); -#endif - lastRunningLengthWord2.setRunningBit(v); -#ifdef EWAHASSERT - assert(lastRunningLengthWord2.getRunningBit() == v); -#endif - lastRunningLengthWord2.setRunningLength(1); -#ifdef EWAHASSERT - assert(lastRunningLengthWord2.getRunningLength() == 1); - assert(lastRunningLengthWord2.getNumberOfLiteralWords() == 0); -#endif - return 1; - } +template size_t EWAHBoolArray::addEmptyWord(const bool v) { + RunningLengthWord lastRunningLengthWord(buffer[lastRLW]); + const bool noliteralword = + (lastRunningLengthWord.getNumberOfLiteralWords() == 0); + // first, if the last running length word is empty, we align it + // this + uword runlen = lastRunningLengthWord.getRunningLength(); + if ((noliteralword) && (runlen == 0)) { + lastRunningLengthWord.setRunningBit(v); + } + if ((noliteralword) && (lastRunningLengthWord.getRunningBit() == v) && + (runlen < RunningLengthWord::largestrunninglengthcount)) { + lastRunningLengthWord.setRunningLength(static_cast(runlen + 1)); + return 0; + } else { + // we have to start anew + buffer.push_back(0); + lastRLW = buffer.size() - 1; + RunningLengthWord lastRunningLengthWord2(buffer[lastRLW]); + lastRunningLengthWord2.setRunningBit(v); + lastRunningLengthWord2.setRunningLength(1); + return 1; + } } +template +void fast_logicalor_tocontainer(size_t n, const EWAHBoolArray **inputs, + EWAHBoolArray &container) { + class EWAHBoolArrayPtr { -template -void EWAHBoolArray::logicalor(const EWAHBoolArray &a, EWAHBoolArray &container) const { - container.reset(); - if (RESERVEMEMORY) - container.buffer.reserve(buffer.size() + a.buffer.size()); - EWAHBoolArrayRawIterator i = a.raw_iterator(); - EWAHBoolArrayRawIterator j = raw_iterator(); - if (!(i.hasNext() and j.hasNext())) {// hopefully this never happens... - container.setSizeInBits(sizeInBits()); - return; - } - // at this point, this should be safe: - BufferedRunningLengthWord & rlwi = i.next(); - BufferedRunningLengthWord & rlwj = j.next(); - - while ((rlwi.size() > 0) && (rlwj.size() > 0)) { - while ((rlwi.getRunningLength() > 0) - || (rlwj.getRunningLength() > 0)) { - const bool i_is_prey = rlwi - .getRunningLength() < rlwj - .getRunningLength(); - BufferedRunningLengthWord & prey = i_is_prey ? rlwi - : rlwj; - BufferedRunningLengthWord & predator = i_is_prey ? rlwj - : rlwi; - if (predator.getRunningBit()) { - container.addStreamOfEmptyWords(true, - predator.getRunningLength()); - prey.discardFirstWordsWithReload(predator - .getRunningLength()); - } else { - const size_t index = prey.discharge(container, - predator.getRunningLength()); - container.addStreamOfEmptyWords(false, - predator.getRunningLength() - - index - ); - } - predator.discardRunningWordsWithReload(); - } - const size_t nbre_literal = min( - rlwi.getNumberOfLiteralWords(), - rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - for (size_t k = 0; k < nbre_literal; ++k) { - container.addWord(rlwi.getLiteralWordAt(k) - | rlwj.getLiteralWordAt(k)); - } - rlwi.discardFirstWordsWithReload(nbre_literal); - rlwj.discardFirstWordsWithReload(nbre_literal); - } + public: + EWAHBoolArrayPtr(const EWAHBoolArray *p, bool o) : ptr(p), own(o) {} + const EWAHBoolArray *ptr; + bool own; // whether to clean + + bool operator<(const EWAHBoolArrayPtr &o) const { + return o.ptr->sizeInBytes() < ptr->sizeInBytes(); // backward on purpose } - const bool i_remains = rlwi.size() > 0; - BufferedRunningLengthWord & remaining = i_remains ? rlwi - : rlwj; - remaining.discharge(container); + }; + if (n == 0) { + container.reset(); + return; + } + if (n == 1) { + container = *inputs[0]; + return; + } + std::priority_queue pq; + for (size_t i = 0; i < n; i++) { + // could use emplace + pq.push(EWAHBoolArrayPtr(inputs[i], false)); + } + while (pq.size() > 2) { + + EWAHBoolArrayPtr x1 = pq.top(); + pq.pop(); + + EWAHBoolArrayPtr x2 = pq.top(); + pq.pop(); + + EWAHBoolArray *buffer = new EWAHBoolArray(); + x1.ptr->logicalor(*x2.ptr, *buffer); + + if (x1.own) { + delete x1.ptr; + } + if (x2.own) { + delete x2.ptr; + } + pq.push(EWAHBoolArrayPtr(buffer, true)); + } + EWAHBoolArrayPtr x1 = pq.top(); + pq.pop(); + + EWAHBoolArrayPtr x2 = pq.top(); + pq.pop(); + + x1.ptr->logicalor(*x2.ptr, container); + + if (x1.own) { + delete x1.ptr; + } + if (x2.own) { + delete x2.ptr; + } } -template -void EWAHBoolArray::logicalxor(const EWAHBoolArray &a, EWAHBoolArray &container) const { - container.reset(); - if (RESERVEMEMORY) - container.buffer.reserve(buffer.size() + a.buffer.size()); - EWAHBoolArrayRawIterator i = a.raw_iterator(); - EWAHBoolArrayRawIterator j = raw_iterator(); - if (!(i.hasNext() and j.hasNext())) {// hopefully this never happens... - container.setSizeInBits(sizeInBits()); - return; +template +void EWAHBoolArray::logicalor(const EWAHBoolArray &a, + EWAHBoolArray &container) const { + container.reset(); + if (RESERVEMEMORY) + container.buffer.reserve(buffer.size() + a.buffer.size()); + EWAHBoolArrayRawIterator i = a.raw_iterator(); + EWAHBoolArrayRawIterator j = raw_iterator(); + if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens... + container.setSizeInBits(sizeInBits()); + return; + } + // at this point, this should be safe: + BufferedRunningLengthWord &rlwi = i.next(); + BufferedRunningLengthWord &rlwj = j.next(); + + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); + BufferedRunningLengthWord &prey = i_is_prey ? rlwi : rlwj; + BufferedRunningLengthWord &predator = i_is_prey ? rlwj : rlwi; + if (predator.getRunningBit()) { + container.fastaddStreamOfEmptyWords(true, predator.getRunningLength()); + prey.discardFirstWordsWithReload(predator.getRunningLength()); + } else { + const size_t index = + prey.discharge(container, predator.getRunningLength()); + container.fastaddStreamOfEmptyWords(false, predator.getRunningLength() - + index); + } + predator.discardRunningWordsWithReload(); } - // at this point, this should be safe: - BufferedRunningLengthWord & rlwi = i.next(); - BufferedRunningLengthWord & rlwj = j.next(); - - - while ((rlwi.size() > 0) && (rlwj.size() > 0)) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - const bool i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); - BufferedRunningLengthWord & prey = i_is_prey ? rlwi : rlwj; - BufferedRunningLengthWord & predator = i_is_prey ? rlwj : rlwi; - const size_t index = (!predator.getRunningBit()) ? prey.discharge(container, - predator.getRunningLength()) : prey.dischargeNegated(container, - predator.getRunningLength()); - container.addStreamOfEmptyWords(predator.getRunningBit(), predator.getRunningLength() - index); - predator.discardRunningWordsWithReload(); - } - const size_t nbre_literal = min(rlwi.getNumberOfLiteralWords(),rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - for (size_t k = 0; k < nbre_literal; ++k) - container.addWord(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); - rlwi.discardFirstWordsWithReload(nbre_literal); - rlwj.discardFirstWordsWithReload(nbre_literal); - } + + const size_t nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (size_t k = 0; k < nbre_literal; ++k) { + container.addWord(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k)); + } + rlwi.discardLiteralWordsWithReload(nbre_literal); + rlwj.discardLiteralWordsWithReload(nbre_literal); + } + } + const bool i_remains = rlwi.size() > 0; + BufferedRunningLengthWord &remaining = i_remains ? rlwi : rlwj; + remaining.discharge(container); + container.setSizeInBits(sizeInBits() > a.sizeInBits() ? sizeInBits() : a.sizeInBits()); +} + +template +size_t EWAHBoolArray::logicalorcount(const EWAHBoolArray &a) const { + size_t answer = 0; + EWAHBoolArrayRawIterator i = a.raw_iterator(); + EWAHBoolArrayRawIterator j = raw_iterator(); + if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens... + return 0; + } + // at this point, this should be safe: + BufferedRunningLengthWord &rlwi = i.next(); + BufferedRunningLengthWord &rlwj = j.next(); + + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); + BufferedRunningLengthWord &prey = i_is_prey ? rlwi : rlwj; + BufferedRunningLengthWord &predator = i_is_prey ? rlwj : rlwi; + if (predator.getRunningBit()) { + answer += predator.getRunningLength() * wordinbits; + prey.discardFirstWordsWithReload(predator.getRunningLength()); + + } else { + // const size_t index = + prey.dischargeCount(predator.getRunningLength(), &answer); + } + predator.discardRunningWordsWithReload(); } - const bool i_remains = rlwi.size() > 0; - BufferedRunningLengthWord & remaining = i_remains ? rlwi : rlwj; - remaining.discharge(container); - /// container.setSizeInBitsWithinLastWord(Math.max(sizeInBits(), a.sizeInBits())); + + const size_t nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (size_t k = 0; k < nbre_literal; ++k) { + answer += countOnes( + (uword)(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k))); + } + rlwi.discardLiteralWordsWithReload(nbre_literal); + rlwj.discardLiteralWordsWithReload(nbre_literal); + } + } + const bool i_remains = rlwi.size() > 0; + BufferedRunningLengthWord &remaining = i_remains ? rlwi : rlwj; + answer += remaining.dischargeCount(); + return answer; } +template +void EWAHBoolArray::logicalxor(const EWAHBoolArray &a, + EWAHBoolArray &container) const { + container.reset(); + if (RESERVEMEMORY) + container.buffer.reserve(buffer.size() + a.buffer.size()); + EWAHBoolArrayRawIterator i = a.raw_iterator(); + EWAHBoolArrayRawIterator j = raw_iterator(); + if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens... + container.setSizeInBits(sizeInBits()); + return; + } + // at this point, this should be safe: + BufferedRunningLengthWord &rlwi = i.next(); + BufferedRunningLengthWord &rlwj = j.next(); + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); + BufferedRunningLengthWord &prey = i_is_prey ? rlwi : rlwj; + BufferedRunningLengthWord &predator = i_is_prey ? rlwj : rlwi; + const size_t index = + (!predator.getRunningBit()) + ? prey.discharge(container, predator.getRunningLength()) + : prey.dischargeNegated(container, predator.getRunningLength()); + container.fastaddStreamOfEmptyWords(predator.getRunningBit(), + predator.getRunningLength() - index); + predator.discardRunningWordsWithReload(); + } + const size_t nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (size_t k = 0; k < nbre_literal; ++k) + container.addWord(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); + rlwi.discardLiteralWordsWithReload(nbre_literal); + rlwj.discardLiteralWordsWithReload(nbre_literal); + } + } + const bool i_remains = rlwi.size() > 0; + BufferedRunningLengthWord &remaining = i_remains ? rlwi : rlwj; + remaining.discharge(container); + container.setSizeInBits(sizeInBits() > a.sizeInBits() ? sizeInBits() : a.sizeInBits()); +} + +template +size_t EWAHBoolArray::logicalxorcount(const EWAHBoolArray &a) const { + EWAHBoolArrayRawIterator i = a.raw_iterator(); + EWAHBoolArrayRawIterator j = raw_iterator(); + if (!i.hasNext()) + return a.numberOfOnes(); + if (!j.hasNext()) + return this->numberOfOnes(); + + size_t answer = 0; + + // at this point, this should be safe: + BufferedRunningLengthWord &rlwi = i.next(); + BufferedRunningLengthWord &rlwj = j.next(); + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); + BufferedRunningLengthWord &prey = i_is_prey ? rlwi : rlwj; + BufferedRunningLengthWord &predator = i_is_prey ? rlwj : rlwi; + size_t index; + + if (predator.getRunningBit()) { + index = + prey.dischargeCountNegated(predator.getRunningLength(), &answer); + } else { + index = prey.dischargeCount(predator.getRunningLength(), &answer); + } + if (predator.getRunningBit()) + answer += (predator.getRunningLength() - index) * wordinbits; + + predator.discardRunningWordsWithReload(); + } + const size_t nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (size_t k = 0; k < nbre_literal; ++k) { + answer += countOnes( + (uword)(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k))); + } + rlwi.discardLiteralWordsWithReload(nbre_literal); + rlwj.discardLiteralWordsWithReload(nbre_literal); + } + } + const bool i_remains = rlwi.size() > 0; + BufferedRunningLengthWord &remaining = i_remains ? rlwi : rlwj; + answer += remaining.dischargeCount(); + return answer; +} -template +template void EWAHBoolArray::logicaland(const EWAHBoolArray &a, - EWAHBoolArray &container) const { - container.reset(); - if (RESERVEMEMORY) - container.buffer.reserve( - buffer.size() > a.buffer.size() ? buffer.size() - : a.buffer.size()); - EWAHBoolArrayRawIterator i = a.raw_iterator(); - EWAHBoolArrayRawIterator j = raw_iterator(); - if (!(i.hasNext() and j.hasNext())) {// hopefully this never happens... - container.setSizeInBits(sizeInBits()); - return; + EWAHBoolArray &container) const { + container.reset(); + if (RESERVEMEMORY) + container.buffer.reserve(buffer.size() > a.buffer.size() ? buffer.size() + : a.buffer.size()); + EWAHBoolArrayRawIterator i = a.raw_iterator(); + EWAHBoolArrayRawIterator j = raw_iterator(); + if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens... + container.setSizeInBits(sizeInBits()); + return; + } + // at this point, this should be safe: + BufferedRunningLengthWord &rlwi = i.next(); + BufferedRunningLengthWord &rlwj = j.next(); + + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); + BufferedRunningLengthWord &prey(i_is_prey ? rlwi : rlwj); + BufferedRunningLengthWord &predator(i_is_prey ? rlwj : rlwi); + if (!predator.getRunningBit()) { + container.fastaddStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWordsWithReload(predator.getRunningLength()); + } else { + const size_t index = + prey.discharge(container, predator.getRunningLength()); + container.fastaddStreamOfEmptyWords(false, predator.getRunningLength() - + index); + } + predator.discardRunningWordsWithReload(); } - // at this point, this should be safe: - BufferedRunningLengthWord & rlwi = i.next(); - BufferedRunningLengthWord & rlwj = j.next(); - - while ((rlwi.size() > 0) && (rlwj.size() > 0)) { - while ((rlwi.getRunningLength() > 0) - || (rlwj.getRunningLength() > 0)) { - const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); - BufferedRunningLengthWord & prey(i_is_prey ? rlwi : rlwj); - BufferedRunningLengthWord & predator(i_is_prey ? rlwj : rlwi); - if (!predator.getRunningBit()) { - container.fastaddStreamOfEmptyWords(false, predator.getRunningLength()); - prey.discardFirstWordsWithReload(predator.getRunningLength()); - } else { - const size_t index = prey.discharge(container, predator.getRunningLength()); - container.fastaddStreamOfEmptyWords(false, predator.getRunningLength() - index); - } - predator.discardRunningWordsWithReload(); - } - const size_t nbre_literal = min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - for (size_t k = 0; k < nbre_literal; ++k) { - container.addWord(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); - } - rlwi.discardFirstWordsWithReload(nbre_literal); - rlwj.discardFirstWordsWithReload(nbre_literal); - } + const size_t nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (size_t k = 0; k < nbre_literal; ++k) { + container.addWord(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); + } + rlwi.discardLiteralWordsWithReload(nbre_literal); + rlwj.discardLiteralWordsWithReload(nbre_literal); } + } + container.setSizeInBits(sizeInBits()); + container.setSizeInBits(sizeInBits() > a.sizeInBits() ? sizeInBits() : a.sizeInBits()); +} + +template +void EWAHBoolArray::logicalandnot(const EWAHBoolArray &a, + EWAHBoolArray &container) const { + container.reset(); + if (RESERVEMEMORY) + container.buffer.reserve(buffer.size() > a.buffer.size() ? buffer.size() + : a.buffer.size()); + EWAHBoolArrayRawIterator i = raw_iterator(); + EWAHBoolArrayRawIterator j = a.raw_iterator(); + if (!j.hasNext()) { // the other fellow is empty + container = *this; // just copy, stupidly, the data + return; + } + if (!(i.hasNext())) { // hopefully this never happens... container.setSizeInBits(sizeInBits()); + return; + } + // at this point, this should be safe: + BufferedRunningLengthWord &rlwi = i.next(); + BufferedRunningLengthWord &rlwj = j.next(); + + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); + BufferedRunningLengthWord &prey(i_is_prey ? rlwi : rlwj); + BufferedRunningLengthWord &predator(i_is_prey ? rlwj : rlwi); + if (((predator.getRunningBit()) && (i_is_prey)) || + ((!predator.getRunningBit()) && (!i_is_prey))) { + container.fastaddStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWordsWithReload(predator.getRunningLength()); + } else if (i_is_prey) { + const size_t index = + prey.discharge(container, predator.getRunningLength()); + container.fastaddStreamOfEmptyWords(false, predator.getRunningLength() - + index); + } else { + const size_t index = + prey.dischargeNegated(container, predator.getRunningLength()); + container.fastaddStreamOfEmptyWords(true, predator.getRunningLength() - + index); + } + predator.discardRunningWordsWithReload(); + } + const size_t nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (size_t k = 0; k < nbre_literal; ++k) { + container.addWord(rlwi.getLiteralWordAt(k) & ~rlwj.getLiteralWordAt(k)); + } + rlwi.discardLiteralWordsWithReload(nbre_literal); + rlwj.discardLiteralWordsWithReload(nbre_literal); + } + } + const bool i_remains = rlwi.size() > 0; + if (i_remains) { + rlwi.discharge(container); + } + container.setSizeInBits(sizeInBits()); +} + +template +size_t EWAHBoolArray::logicalandnotcount(const EWAHBoolArray &a) const { + EWAHBoolArrayRawIterator i = raw_iterator(); + EWAHBoolArrayRawIterator j = a.raw_iterator(); + if (!j.hasNext()) { // the other fellow is empty + return this->numberOfOnes(); + } + if (!(i.hasNext())) { // hopefully this never happens... + return 0; + } + size_t answer = 0; + // at this point, this should be safe: + BufferedRunningLengthWord &rlwi = i.next(); + BufferedRunningLengthWord &rlwj = j.next(); + + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); + BufferedRunningLengthWord &prey(i_is_prey ? rlwi : rlwj); + BufferedRunningLengthWord &predator(i_is_prey ? rlwj : rlwi); + if (((predator.getRunningBit()) && (i_is_prey)) || + ((!predator.getRunningBit()) && (!i_is_prey))) { + prey.discardFirstWordsWithReload(predator.getRunningLength()); + } else if (i_is_prey) { + prey.dischargeCount(predator.getRunningLength(), &answer); + } else { + const size_t index = + prey.dischargeCountNegated(predator.getRunningLength(), &answer); + answer += (predator.getRunningLength() - index) * wordinbits; + } + predator.discardRunningWordsWithReload(); + } + const size_t nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (size_t k = 0; k < nbre_literal; ++k) { + answer += countOnes( + (uword)(rlwi.getLiteralWordAt(k) & (~rlwj.getLiteralWordAt(k)))); + } + rlwi.discardLiteralWordsWithReload(nbre_literal); + rlwj.discardLiteralWordsWithReload(nbre_literal); + } + } + const bool i_remains = rlwi.size() > 0; + if (i_remains) { + answer += rlwi.dischargeCount(); + } + return answer; } +template +size_t EWAHBoolArray::logicalandcount(const EWAHBoolArray &a) const { + EWAHBoolArrayRawIterator i = a.raw_iterator(); + EWAHBoolArrayRawIterator j = raw_iterator(); + if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens... + return 0; + } + size_t answer = 0; + // at this point, this should be safe: + BufferedRunningLengthWord &rlwi = i.next(); + BufferedRunningLengthWord &rlwj = j.next(); + + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); + BufferedRunningLengthWord &prey(i_is_prey ? rlwi : rlwj); + BufferedRunningLengthWord &predator(i_is_prey ? rlwj : rlwi); + if (!predator.getRunningBit()) { + prey.discardFirstWordsWithReload(predator.getRunningLength()); + } else { + // const size_t index = + prey.dischargeCount(predator.getRunningLength(), &answer); + } + predator.discardRunningWordsWithReload(); + } + const size_t nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (size_t k = 0; k < nbre_literal; ++k) { + answer += countOnes( + (uword)(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k))); + } + rlwi.discardLiteralWordsWithReload(nbre_literal); + rlwj.discardLiteralWordsWithReload(nbre_literal); + } + } + return answer; +} -template +template bool EWAHBoolArray::intersects(const EWAHBoolArray &a) const { -EWAHBoolArrayRawIterator i = a.raw_iterator(); -EWAHBoolArrayRawIterator j = raw_iterator(); -if (!(i.hasNext() and j.hasNext())) {// hopefully this never happens... + EWAHBoolArrayRawIterator i = a.raw_iterator(); + EWAHBoolArrayRawIterator j = raw_iterator(); + if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens... return false; -} -// at this point, this should be safe: -BufferedRunningLengthWord & rlwi = i.next(); -BufferedRunningLengthWord & rlwj = j.next(); + } + // at this point, this should be safe: + BufferedRunningLengthWord &rlwi = i.next(); + BufferedRunningLengthWord &rlwj = j.next(); -while ((rlwi.size() > 0) && (rlwj.size() > 0)) { - while ((rlwi.getRunningLength() > 0) - || (rlwj.getRunningLength() > 0)) { + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); - BufferedRunningLengthWord & prey(i_is_prey ? rlwi : rlwj); - BufferedRunningLengthWord & predator(i_is_prey ? rlwj : rlwi); - if (!predator.getRunningBit()) { - prey.discardFirstWordsWithReload(predator.getRunningLength()); - } else { - size_t index = 0; - bool isnonzero = prey.nonzero_discharge(predator.getRunningLength(),index); - if(isnonzero) return true; - } - predator.discardRunningWordsWithReload(); + BufferedRunningLengthWord &prey(i_is_prey ? rlwi : rlwj); + BufferedRunningLengthWord &predator(i_is_prey ? rlwj : rlwi); + if (!predator.getRunningBit()) { + prey.discardFirstWordsWithReload(predator.getRunningLength()); + } else { + size_t index = 0; + bool isnonzero = + prey.nonzero_discharge(predator.getRunningLength(), index); + if (isnonzero) + return true; + } + predator.discardRunningWordsWithReload(); } - const size_t nbre_literal = min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); + const size_t nbre_literal = std::min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { - for (size_t k = 0; k < nbre_literal; ++k) { - if((rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k))!=0) return true; - } - rlwi.discardFirstWordsWithReload(nbre_literal); - rlwj.discardFirstWordsWithReload(nbre_literal); + for (size_t k = 0; k < nbre_literal; ++k) { + if ((rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)) != 0) + return true; + } + rlwi.discardLiteralWordsWithReload(nbre_literal); + rlwj.discardLiteralWordsWithReload(nbre_literal); } -} -return false; + } + return false; } -template +template BitmapStatistics EWAHBoolArray::computeStatistics() const { - //uint totalcompressed(0), totalliteral(0); - BitmapStatistics bs; - EWAHBoolArrayRawIterator i = raw_iterator(); - while (i.hasNext()) { - BufferedRunningLengthWord &brlw(i.next()); - ++bs.runningwordmarker; - bs.totalliteral += brlw.getNumberOfLiteralWords(); - bs.totalcompressed += brlw.getRunningLength(); - if (brlw.getRunningLength() - == RunningLengthWord::largestrunninglengthcount) { - ++bs.maximumofrunningcounterreached; - } - } - return bs; + BitmapStatistics bs; + EWAHBoolArrayRawIterator i = raw_iterator(); + while (i.hasNext()) { + BufferedRunningLengthWord &brlw(i.next()); + ++bs.runningwordmarker; + bs.totalliteral += brlw.getNumberOfLiteralWords(); + bs.totalcompressed += brlw.getRunningLength(); + if (brlw.getRunningLength() == + RunningLengthWord::largestrunninglengthcount) { + ++bs.maximumofrunningcounterreached; + } + } + return bs; } -template -void EWAHBoolArray::debugprintout() const { - cout << "==printing out EWAHBoolArray==" << endl; - cout << "Number of compressed words: " << buffer.size() << endl; - size_t pointer = 0; - while (pointer < buffer.size()) { - ConstRunningLengthWord rlw(buffer[pointer]); - bool b = rlw.getRunningBit(); - const uword rl = rlw.getRunningLength(); - const uword lw = rlw.getNumberOfLiteralWords(); - cout << "pointer = " << pointer << " running bit=" << b - << " running length=" << rl << " lit. words=" << lw << endl; - for (uword j = 0; j < lw; ++j) { - const uword & w = buffer[pointer + j + 1]; - cout << toBinaryString(w) << endl; - } - pointer += lw + 1; - } - cout << "==END==" << endl; +template void EWAHBoolArray::debugprintout() const { + std::cout << "==printing out EWAHBoolArray==" << std::endl; + std::cout << "Number of compressed words: " << buffer.size() << std::endl; + size_t pointer = 0; + while (pointer < buffer.size()) { + ConstRunningLengthWord rlw(buffer[pointer]); + bool b = rlw.getRunningBit(); + const uword rl = rlw.getRunningLength(); + const uword lw = rlw.getNumberOfLiteralWords(); + std::cout << "pointer = " << pointer << " running bit=" << b + << " running length=" << rl << " lit. words=" << lw << std::endl; + for (uword j = 0; j < lw; ++j) { + const uword &w = buffer[pointer + j + 1]; + std::cout << toBinaryString(w) << std::endl; + } + pointer += lw + 1; + } + std::cout << "==END==" << std::endl; } -template -size_t EWAHBoolArray::sizeOnDisk() const { - return sizeof(sizeinbits) + sizeof(size_t) + sizeof(uword) * buffer.size(); +template +size_t EWAHBoolArray::sizeOnDisk(const bool savesizeinbits) const { + return (savesizeinbits ? sizeof(sizeinbits) : 0) + sizeof(size_t) + + sizeof(uword) * buffer.size(); } #endif diff --git a/yt/utilities/lib/ewahboolarray/ewahutil.h b/yt/utilities/lib/ewahboolarray/ewahutil.h index 0d5231b7d4e..1d5ee30a0f0 100644 --- a/yt/utilities/lib/ewahboolarray/ewahutil.h +++ b/yt/utilities/lib/ewahboolarray/ewahutil.h @@ -25,226 +25,216 @@ #include #include -#if defined(_WIN64) -#include -#endif - // taken from stackoverflow #ifndef NDEBUG -# define ASSERT(condition, message) \ - do { \ - if (! (condition)) { \ - std::cerr << "Assertion `" #condition "` failed in " << __FILE__ \ - << " line " << __LINE__ << ": " << message << std::endl; \ - std::exit(EXIT_FAILURE); \ - } \ - } while (false) +#define ASSERT(condition, message) \ + do { \ + if (!(condition)) { \ + std::cerr << "Assertion `" #condition "` failed in " << __FILE__ \ + << " line " << __LINE__ << ": " << message << std::endl; \ + std::exit(EXIT_FAILURE); \ + } \ + } while (false) #else -# define ASSERT(condition, message) do { } while (false) +#define ASSERT(condition, message) \ + do { \ + } while (false) #endif - +#ifdef _MSC_VER +#include +#endif static inline uint32_t ctz64(uint64_t n) { -#if defined(__GNUC__) && UINT_MAX >= UINT32_MAX - return static_cast(__builtin_ctzl(n)); -#elif defined(_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1400 - uint32_t i; - _BitScanForward64((unsigned long *) &i, n); - return i; +#if defined(__GNUC__) && UINT_MAX >= UINT32_MAX && ULLONG_MAX >= UINT64_MAX + return static_cast(__builtin_ctzll(n)); +#elif defined(_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1400 && \ + ULONG_MAX >= UINT64_MAX + uint32_t i; + _BitScanForward64((unsigned long *)&i, n); + return i; #else - uint32_t i = 1; - if ((n & static_cast(4294967295)) == 0) { - n >>= 32; - i += 32; - } - if ((n & static_cast(0x0000FFFFUL)) == 0) { - n >>= 16; - i += 16; - } - - if ((n & static_cast(0x000000FFUL)) == 0) { - n >>= 8; - i += 8; - } - - if ((n & static_cast(0x0000000FUL)) == 0) { - n >>= 4; - i += 4; - } - - if ((n & static_cast(0x00000003UL)) == 0) { - n >>= 2; - i += 2; - } - i -= (n & 0x1); - return i; + uint32_t i = 1; + if ((n & static_cast(4294967295)) == 0) { + n >>= 32; + i += 32; + } + if ((n & static_cast(0x0000FFFFUL)) == 0) { + n >>= 16; + i += 16; + } + + if ((n & static_cast(0x000000FFUL)) == 0) { + n >>= 8; + i += 8; + } + + if ((n & static_cast(0x0000000FUL)) == 0) { + n >>= 4; + i += 4; + } + + if ((n & static_cast(0x00000003UL)) == 0) { + n >>= 2; + i += 2; + } + i -= (n & 0x1); + return i; #endif } - - - static inline uint32_t ctz32(uint32_t n) { #if defined(__GNUC__) && UINT_MAX >= UINT32_MAX - return static_cast(__builtin_ctz(n)); + return static_cast(__builtin_ctz(n)); #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) - uint32_t i; - __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n)); - return i; + uint32_t i; + __asm__("bsfl %1, %0" : "=r"(i) : "rm"(n)); + return i; #elif defined(_MSC_VER) && _MSC_VER >= 1400 - uint32_t i; - _BitScanForward((unsigned long *) &i, n); - return i; + uint32_t i; + _BitScanForward((unsigned long *)&i, n); + return i; #else - uint32_t i = 1; + uint32_t i = 1; - if ((n & static_cast(0x0000FFFF)) == 0) { - n >>= 16; - i += 16; - } + if ((n & static_cast(0x0000FFFF)) == 0) { + n >>= 16; + i += 16; + } - if ((n & static_cast(0x000000FF)) == 0) { - n >>= 8; - i += 8; - } + if ((n & static_cast(0x000000FF)) == 0) { + n >>= 8; + i += 8; + } - if ((n & static_cast(0x0000000F)) == 0) { - n >>= 4; - i += 4; - } + if ((n & static_cast(0x0000000F)) == 0) { + n >>= 4; + i += 4; + } - if ((n & static_cast(0x00000003)) == 0) { - n >>= 2; - i += 2; - } + if ((n & static_cast(0x00000003)) == 0) { + n >>= 2; + i += 2; + } - i -= (n & 1); + i -= (n & 1); - return i; + return i; #endif } - static inline uint32_t ctz16(uint16_t n) { #if defined(__GNUC__) && UINT_MAX >= UINT32_MAX - return static_cast(__builtin_ctz(n)); + return static_cast(__builtin_ctz(n)); #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) - uint32_t i; - __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n)); - return i; + uint32_t i; + __asm__("bsfl %1, %0" : "=r"(i) : "rm"(n)); + return i; #elif defined(_MSC_VER) && _MSC_VER >= 1400 - uint32_t i; - _BitScanForward((unsigned long *) &i, n); - return i; + uint32_t i; + _BitScanForward((unsigned long *)&i, n); + return i; #else - uint32_t i = 1; + uint32_t i = 1; - if ((n & static_cast(0x000000FF)) == 0) { - n >>= 8; - i += 8; - } + if ((n & static_cast(0x000000FF)) == 0) { + n >>= 8; + i += 8; + } - if ((n & static_cast(0x0000000F)) == 0) { - n >>= 4; - i += 4; - } + if ((n & static_cast(0x0000000F)) == 0) { + n >>= 4; + i += 4; + } - if ((n & static_cast(0x00000003)) == 0) { - n >>= 2; - i += 2; - } - i -= (n & 1); + if ((n & static_cast(0x00000003)) == 0) { + n >>= 2; + i += 2; + } + i -= (n & 1); - return i; + return i; #endif } - - - #ifdef __GNUC__ /** * count the number of bits set to one (32 bit version) */ inline uint32_t countOnes(uint32_t x) { - return static_cast(__builtin_popcount(x)); + return static_cast(__builtin_popcount(x)); } #elif defined(_MSC_VER) && _MSC_VER >= 1400 -inline uint32_t countOnes(uint32_t x) { - return __popcnt(x); -} +inline uint32_t countOnes(uint32_t x) { return __popcnt(x); } #else inline uint32_t countOnes(uint32_t v) { - v = v - ((v >> 1) & 0x55555555); - v = (v & 0x33333333) + ((v >> 2) & 0x33333333); - return static_cast((((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24); + v = v - ((v >> 1) & 0x55555555); + v = (v & 0x33333333) + ((v >> 2) & 0x33333333); + return static_cast((((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> + 24); } #endif - #ifdef __GNUC__ /** * count the number of bits set to one (64 bit version) */ inline uint32_t countOnes(uint64_t x) { - return static_cast(__builtin_popcountl(x)); + return static_cast(__builtin_popcountll(x)); } #elif defined(_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1400 inline uint32_t countOnes(uint64_t x) { - return static_cast(__popcnt64(static_cast<__int64>(x))); + return static_cast(__popcnt64(static_cast<__int64>(x))); } #else inline uint32_t countOnes(uint64_t v) { - v = v - ((v >> 1) & 0x5555555555555555); - v = (v & 0x3333333333333333) + - ((v >> 2) & 0x3333333333333333); - v = ((v + (v >> 4)) & 0x0F0F0F0F0F0F0F0F); - return static_cast((v*(0x0101010101010101))>>56); + v = v - ((v >> 1) & 0x5555555555555555); + v = (v & 0x3333333333333333) + ((v >> 2) & 0x3333333333333333); + v = ((v + (v >> 4)) & 0x0F0F0F0F0F0F0F0F); + return static_cast((v * (0x0101010101010101)) >> 56); } #endif inline uint32_t countOnes(uint16_t v) { - return countOnes(static_cast(v)); + return countOnes(static_cast(v)); } - inline uint32_t numberOfTrailingZeros(uint32_t x) { - if (x == 0) return 32; - return ctz32(x); + if (x == 0) + return 32; + return ctz32(x); } - inline uint32_t numberOfTrailingZeros(uint64_t x) { - if (x == 0) return 64; - return ctz64(x); + if (x == 0) + return 64; + return ctz64(x); } inline uint32_t numberOfTrailingZeros(uint16_t x) { - if (x == 0) return 16; - return ctz16(x); + if (x == 0) + return 16; + return ctz16(x); } - /** * Returns the binary representation of a binary word. */ -template -std::string toBinaryString(const uword w) { - std::ostringstream convert; - for (uint32_t k = 0; k < sizeof(uword) * 8; ++k) { - if (w & (static_cast (1) << k)) - convert << "1"; - else - convert << "0"; - } - return convert.str(); +template std::string toBinaryString(const uword w) { + std::ostringstream convert; + for (uint32_t k = 0; k < sizeof(uword) * 8; ++k) { + if (w & (static_cast(1) << k)) + convert << "1"; + else + convert << "0"; + } + return convert.str(); } #endif diff --git a/yt/utilities/lib/ewahboolarray/runninglengthword.h b/yt/utilities/lib/ewahboolarray/runninglengthword.h index 5ff441a62f8..85ccdf5ddad 100644 --- a/yt/utilities/lib/ewahboolarray/runninglengthword.h +++ b/yt/utilities/lib/ewahboolarray/runninglengthword.h @@ -7,474 +7,542 @@ #ifndef RUNNINGLENGTHWORD_H_ #define RUNNINGLENGTHWORD_H_ #include -using namespace std; /** * For expert users. * This class is used to represent a special type of word storing * a run length. It is defined by the Enhanced Word Aligned Hybrid (EWAH) * format. You don't normally need to access this class. */ -template -class RunningLengthWord { +template class RunningLengthWord { public: - RunningLengthWord(uword & data) : - mydata(data) { - } - - RunningLengthWord(const RunningLengthWord & rlw) : - mydata(rlw.mydata) { - } - - RunningLengthWord& operator=(const RunningLengthWord & rlw) { - mydata = rlw.mydata; - return *this; - } - - /** - * Which bit is being repeated? - */ - bool getRunningBit() const { - return mydata & static_cast (1); - } - - /** - * how many words should be filled by the running bit - */ - static inline bool getRunningBit(uword data) { - return data & static_cast (1); - } - - /** - * how many words should be filled by the running bit - */ - uword getRunningLength() const { - return static_cast((mydata >> 1) & largestrunninglengthcount); - } - - /** - * followed by how many literal words? - */ - static inline uword getRunningLength(uword data) { - return static_cast((data >> 1) & largestrunninglengthcount); - } - - /** - * followed by how many literal words? - */ - uword getNumberOfLiteralWords() const { - return static_cast (mydata >> (1 + runninglengthbits)); - } - - /** - * Total of getRunningLength() and getNumberOfLiteralWords() - */ - uword size() const { - return static_cast(getRunningLength() + getNumberOfLiteralWords()); - } - - - - /** - * Total of getRunningLength() and getNumberOfLiteralWords() - */ - static inline uword size(uword data) { - return static_cast(getRunningLength(data) + getNumberOfLiteralWords(data)); - } - - /** - * followed by how many literal words? - */ - static inline uword getNumberOfLiteralWords(uword data) { - return static_cast (data >> (1 + runninglengthbits)); - } - - /** - * running length of which type of bits - */ - void setRunningBit(bool b) { - if (b) - mydata |= static_cast (1); - else - mydata &= static_cast (~1); - } - - - void discardFirstWords(uword x) { -#ifdef EWAHASSERT - assert(x <= size()); -#endif - const uword rl(getRunningLength()); - if (rl >= x) { - setRunningLength(rl - x); - return; - } - x -= rl; - setRunningLength(0); - setNumberOfLiteralWords(getNumberOfLiteralWords() - x); - } - - /** - * running length of which type of bits - */ - static inline void setRunningBit(uword & data, bool b) { - if (b) - data |= static_cast (1); - else - data &= static_cast (~1); - } - - void setRunningLength(uword l) { - mydata |= shiftedlargestrunninglengthcount; - mydata &= static_cast ((l << 1) - | notshiftedlargestrunninglengthcount); - } - - // static call for people who hate objects - static inline void setRunningLength(uword & data, uword l) { - data |= shiftedlargestrunninglengthcount; - data &= static_cast ((l << 1) - | notshiftedlargestrunninglengthcount); - } - - void setNumberOfLiteralWords(uword l) { - mydata |= notrunninglengthplusrunningbit; - mydata &= static_cast ((l << (runninglengthbits + 1)) - | runninglengthplusrunningbit); - } - // static call for people who hate objects - static inline void setNumberOfLiteralWords(uword & data, uword l) { - data |= notrunninglengthplusrunningbit; - data &= static_cast (l << (runninglengthbits + 1)) - | runninglengthplusrunningbit; - } - - - static const uint32_t runninglengthbits = sizeof(uword) * 4; - static const uint32_t literalbits = sizeof(uword) * 8 - 1 - runninglengthbits; - static const uword largestliteralcount = (static_cast (1) - << literalbits) - 1; - static const uword largestrunninglengthcount = (static_cast (1) - << runninglengthbits) - 1; - static const uword shiftedlargestrunninglengthcount = - largestrunninglengthcount << 1; - static const uword notshiftedlargestrunninglengthcount = - static_cast (~shiftedlargestrunninglengthcount); - static const uword runninglengthplusrunningbit = (static_cast (1) - << (runninglengthbits + 1)) - 1; - static const uword notrunninglengthplusrunningbit = - static_cast (~runninglengthplusrunningbit); - static const uword notlargestrunninglengthcount = - static_cast (~largestrunninglengthcount); - - uword & mydata; + RunningLengthWord(uword &data) : mydata(data) {} + + RunningLengthWord(const RunningLengthWord &rlw) : mydata(rlw.mydata) {} + + RunningLengthWord &operator=(const RunningLengthWord &rlw) { + mydata = rlw.mydata; + return *this; + } + + /** + * Which bit is being repeated? + */ + bool getRunningBit() const { return mydata & static_cast(1); } + + /** + * how many words should be filled by the running bit + */ + static inline bool getRunningBit(uword data) { + return data & static_cast(1); + } + + /** + * how many words should be filled by the running bit + */ + uword getRunningLength() const { + return static_cast((mydata >> 1) & largestrunninglengthcount); + } + + /** + * followed by how many literal words? + */ + static inline uword getRunningLength(uword data) { + return static_cast((data >> 1) & largestrunninglengthcount); + } + + /** + * followed by how many literal words? + */ + uword getNumberOfLiteralWords() const { + return static_cast(mydata >> (1 + runninglengthbits)); + } + + /** + * Total of getRunningLength() and getNumberOfLiteralWords() + */ + uword size() const { + return static_cast(getRunningLength() + getNumberOfLiteralWords()); + } + + /** + * Total of getRunningLength() and getNumberOfLiteralWords() + */ + static inline uword size(uword data) { + return static_cast(getRunningLength(data) + + getNumberOfLiteralWords(data)); + } + + /** + * followed by how many literal words? + */ + static inline uword getNumberOfLiteralWords(uword data) { + return static_cast(data >> (1 + runninglengthbits)); + } + + /** + * running length of which type of bits + */ + void setRunningBit(bool b) { + if (b) + mydata |= static_cast(1); + else + mydata &= static_cast(~1); + } + + void discardFirstWords(uword x) { + const uword rl(getRunningLength()); + if (rl >= x) { + setRunningLength(rl - x); + return; + } + x -= rl; + setRunningLength(0); + setNumberOfLiteralWords(getNumberOfLiteralWords() - x); + } + + /** + * running length of which type of bits + */ + static inline void setRunningBit(uword &data, bool b) { + if (b) + data |= static_cast(1); + else + data &= static_cast(~1); + } + + void setRunningLength(uword l) { + mydata |= shiftedlargestrunninglengthcount; + mydata &= + static_cast((l << 1) | notshiftedlargestrunninglengthcount); + } + + // static call for people who hate objects + static inline void setRunningLength(uword &data, uword l) { + data |= shiftedlargestrunninglengthcount; + data &= static_cast((l << 1) | notshiftedlargestrunninglengthcount); + } + + void setNumberOfLiteralWords(uword l) { + mydata |= notrunninglengthplusrunningbit; + mydata &= static_cast((l << (runninglengthbits + 1)) | + runninglengthplusrunningbit); + } + // static call for people who hate objects + static inline void setNumberOfLiteralWords(uword &data, uword l) { + data |= notrunninglengthplusrunningbit; + data &= static_cast(l << (runninglengthbits + 1)) | + runninglengthplusrunningbit; + } + + static const uint32_t runninglengthbits = sizeof(uword) * 4; + static const uint32_t literalbits = sizeof(uword) * 8 - 1 - runninglengthbits; + static const uword largestliteralcount = + (static_cast(1) << literalbits) - 1; + static const uword largestrunninglengthcount = + (static_cast(1) << runninglengthbits) - 1; + static const uword shiftedlargestrunninglengthcount = + largestrunninglengthcount << 1; + static const uword notshiftedlargestrunninglengthcount = + static_cast(~shiftedlargestrunninglengthcount); + static const uword runninglengthplusrunningbit = + (static_cast(1) << (runninglengthbits + 1)) - 1; + static const uword notrunninglengthplusrunningbit = + static_cast(~runninglengthplusrunningbit); + static const uword notlargestrunninglengthcount = + static_cast(~largestrunninglengthcount); + + uword &mydata; }; /** * Same as RunningLengthWord, except that the values cannot be modified. */ -template -class ConstRunningLengthWord { +template class ConstRunningLengthWord { public: - - ConstRunningLengthWord() : - mydata(0) { - } - - ConstRunningLengthWord(const uword data) : - mydata(data) { - } - - ConstRunningLengthWord(const ConstRunningLengthWord & rlw) : - mydata(rlw.mydata) { - } - - /** - * Which bit is being repeated? - */ - bool getRunningBit() const { - return mydata & static_cast (1); - } - - /** - * how many words should be filled by the running bit - */ - uword getRunningLength() const { - return static_cast((mydata >> 1) - & RunningLengthWord::largestrunninglengthcount); - } - - /** - * followed by how many literal words? - */ - uword getNumberOfLiteralWords() const { - return static_cast (mydata >> (1 - + RunningLengthWord::runninglengthbits)); - } - - /** - * Total of getRunningLength() and getNumberOfLiteralWords() - */ - uword size() const { - return getRunningLength() + getNumberOfLiteralWords(); - } - - uword mydata; + ConstRunningLengthWord() : mydata(0) {} + + ConstRunningLengthWord(const uword data) : mydata(data) {} + + ConstRunningLengthWord(const ConstRunningLengthWord &rlw) + : mydata(rlw.mydata) {} + + /** + * Which bit is being repeated? + */ + bool getRunningBit() const { return mydata & static_cast(1); } + + /** + * how many words should be filled by the running bit + */ + uword getRunningLength() const { + return static_cast( + (mydata >> 1) & RunningLengthWord::largestrunninglengthcount); + } + + /** + * followed by how many literal words? + */ + uword getNumberOfLiteralWords() const { + return static_cast( + mydata >> (1 + RunningLengthWord::runninglengthbits)); + } + + /** + * Total of getRunningLength() and getNumberOfLiteralWords() + */ + uword size() const { return getRunningLength() + getNumberOfLiteralWords(); } + + uword mydata; }; -template -class EWAHBoolArray; +template class EWAHBoolArray; -template -class EWAHBoolArrayRawIterator; +template class EWAHBoolArrayRawIterator; /** * Same as RunningLengthWord, except that the values are buffered for quick * access. */ -template -class BufferedRunningLengthWord { +template class BufferedRunningLengthWord { public: - BufferedRunningLengthWord(const uword & data, EWAHBoolArrayRawIterator * p) : - RunningBit(data & static_cast (1)), - RunningLength( - static_cast((data >> 1) - & RunningLengthWord::largestrunninglengthcount)), - NumberOfLiteralWords( - static_cast (data >> (1 + RunningLengthWord< - uword>::runninglengthbits))), parent(p) { - } - BufferedRunningLengthWord(const RunningLengthWord & p) : - RunningBit(p.mydata & static_cast (1)), - RunningLength( - (p.mydata >> 1) - & RunningLengthWord::largestrunninglengthcount), - NumberOfLiteralWords( - p.mydata >> (1 - + RunningLengthWord::runninglengthbits)), - parent(p.parent) { - } - - - void discharge(EWAHBoolArray &container) { - while (size() > 0) { - // first run - - size_t pl = getRunningLength(); - container.addStreamOfEmptyWords(getRunningBit(), pl); - size_t pd = getNumberOfLiteralWords(); - writeLiteralWords(pd, container); - discardFirstWordsWithReload(pl + pd); - } - } - - bool nonzero_discharge() { - while (size() > 0) { - // first run - size_t pl = getRunningLength(); - if((pl>0) && (getRunningBit())) return true; - size_t pd = getNumberOfLiteralWords(); - if(pd>0) return true; - discardFirstWordsWithReload(pl + pd); - } - return false; - } - - // Write out up to max words, returns how many were written - size_t discharge(EWAHBoolArray &container, size_t max) { - size_t index = 0; - while ((index < max) && (size() > 0)) { - // first run - size_t pl = getRunningLength(); - if (index + pl > max) { - pl = max - index; - } - container.addStreamOfEmptyWords(getRunningBit(), pl); - index += pl; - size_t pd = getNumberOfLiteralWords(); - if (pd + index > max) { - pd = max - index; - } - writeLiteralWords(pd, container); - index += pd; - discardFirstWordsWithReload(pl + pd); - } - return index; - } - - bool nonzero_discharge(size_t max, size_t & index) { - index = 0; - while ((index < max) && (size() > 0)) { - // first run - size_t pl = getRunningLength(); - if (index + pl > max) { - pl = max - index; - } - if((getRunningBit()) && (pl>0)) return true; - index += pl; - size_t pd = getNumberOfLiteralWords(); - if (pd + index > max) { - pd = max - index; - } - if(pd>0) return true; - discardFirstWordsWithReload(pl + pd); - } - return false; - } - - // Write out up to max words, returns how many were written - size_t dischargeNegated(EWAHBoolArray &container, size_t max) { - size_t index = 0; - while ((index < max) && (size() > 0)) { - // first run - size_t pl = getRunningLength(); - if (index + pl > max) { - pl = max - index; - } - container.addStreamOfEmptyWords(!getRunningBit(), pl); - index += pl; - size_t pd = getNumberOfLiteralWords(); - if (pd + index > max) { - pd = max - index; - } - writeNegatedLiteralWords(pd, container); - discardFirstWordsWithReload(pl + pd); - index += pd; - } - return index; - } - bool nonzero_dischargeNegated(size_t max, size_t & index) { - while ((index < max) && (size() > 0)) { - // first run - size_t pl = getRunningLength(); - if (index + pl > max) { - pl = max - index; - } - if((!getRunningBit()) && (pl>0)) return true; - index += pl; - size_t pd = getNumberOfLiteralWords(); - if (pd + index > max) { - pd = max - index; - } - if(pd>0) return true; - discardFirstWordsWithReload(pl + pd); - index += pd; - } - return false; - } - - uword getLiteralWordAt(size_t index) { - return parent->dirtyWords()[index]; - } - - - void writeLiteralWords(size_t numWords, EWAHBoolArray &container) { - container.addStreamOfDirtyWords(parent->dirtyWords(), numWords); - } - - - void writeNegatedLiteralWords(size_t numWords, EWAHBoolArray &container) { - container.addStreamOfNegatedDirtyWords(parent->dirtyWords(), numWords); - } - - void discardRunningWordsWithReload() { + enum { wordinbits = sizeof(uword) * 8 }; + + BufferedRunningLengthWord(const uword &data, + EWAHBoolArrayRawIterator *p) + : RunningBit(data & static_cast(1)), + RunningLength(static_cast( + (data >> 1) & RunningLengthWord::largestrunninglengthcount)), + NumberOfLiteralWords(static_cast( + data >> (1 + RunningLengthWord::runninglengthbits))), + parent(p) {} + BufferedRunningLengthWord(const RunningLengthWord &p) + : RunningBit(p.mydata & static_cast(1)), + RunningLength((p.mydata >> 1) & + RunningLengthWord::largestrunninglengthcount), + NumberOfLiteralWords(p.mydata >> + (1 + RunningLengthWord::runninglengthbits)), + parent(p.parent) {} + + void discharge(EWAHBoolArray &container) { + while (size() > 0) { + // first run + size_t pl = getRunningLength(); + container.fastaddStreamOfEmptyWords(getRunningBit(), pl); + size_t pd = getNumberOfLiteralWords(); + writeLiteralWords(pd, container); + if (!next()) + break; + } + } + + size_t dischargeCount() { + size_t answer = 0; + while (size() > 0) { + // first run + if (getRunningBit()) { + answer += wordinbits * getRunningLength(); + } + size_t pd = getNumberOfLiteralWords(); + for (size_t i = 0; i < pd; ++i) + answer += countOnes((uword)getLiteralWordAt(i)); + if (!next()) + break; + } + return answer; + } + + size_t dischargeCountNegated() { + size_t answer = 0; + while (size() > 0) { + // first run + if (!getRunningBit()) { + answer += wordinbits * getRunningLength(); + } + size_t pd = getNumberOfLiteralWords(); + for (size_t i = 0; i < pd; ++i) + answer += countOnes((uword)(~getLiteralWordAt(i))); + if (!next()) + break; + } + return answer; + } + + // Symbolically write out up to max words, returns how many were written, + // write to count the number bits written (we assume that count was initially + // zero) + size_t dischargeCount(size_t max, size_t *count) { + size_t index = 0; + while (true) { + if (index + RunningLength > max) { + const size_t offset = max - index; + if (getRunningBit()) + *count += offset * wordinbits; + RunningLength -= offset; + return max; + } + if (getRunningBit()) + *count += RunningLength * wordinbits; + index += RunningLength; + if (NumberOfLiteralWords + index > max) { + const size_t offset = max - index; + for (size_t i = 0; i < offset; ++i) + *count += countOnes((uword)getLiteralWordAt(i)); RunningLength = 0; - if(NumberOfLiteralWords == 0) - next(); - } - - bool next() { - if (!parent->hasNext()) { - NumberOfLiteralWords = 0; - RunningLength = 0; - return false; - } - parent->next(); + NumberOfLiteralWords -= offset; + return max; + } + for (size_t i = 0; i < NumberOfLiteralWords; ++i) + *count += countOnes((uword)getLiteralWordAt(i)); + index += NumberOfLiteralWords; + if (!next()) + break; + } + return index; + } + + size_t dischargeCountNegated(size_t max, size_t *count) { + size_t index = 0; + while (true) { + if (index + RunningLength > max) { + const size_t offset = max - index; + if (!getRunningBit()) + *count += offset * wordinbits; + RunningLength -= offset; + return max; + } + if (!getRunningBit()) + *count += RunningLength * wordinbits; + index += RunningLength; + if (NumberOfLiteralWords + index > max) { + const size_t offset = max - index; + for (size_t i = 0; i < offset; ++i) + *count += countOnes((uword)(~getLiteralWordAt(i))); + RunningLength = 0; + NumberOfLiteralWords -= offset; + return max; + } + for (size_t i = 0; i < NumberOfLiteralWords; ++i) + *count += countOnes((uword)(~getLiteralWordAt(i))); + index += NumberOfLiteralWords; + if (!next()) + break; + } + return index; + } + bool nonzero_discharge() { + while (size() > 0) { + // first run + size_t pl = getRunningLength(); + if ((pl > 0) && (getRunningBit())) return true; - } - - void read(const uword & data) { - RunningBit = data & static_cast (1); - RunningLength = static_cast((data >> 1) - & RunningLengthWord::largestrunninglengthcount); - NumberOfLiteralWords = static_cast (data >> (1 - + RunningLengthWord::runninglengthbits)); - } - - /** - * Which bit is being repeated? - */ - bool getRunningBit() const { - return RunningBit; - } - - void discardFirstWords(uword x) { - if (RunningLength >= x) { - RunningLength = static_cast (RunningLength - x); - return; - } - x = static_cast (x - RunningLength); + size_t pd = getNumberOfLiteralWords(); + if (pd > 0) + return true; + discardFirstWordsWithReload(pl + pd); + } + return false; + } + + // Write out up to max words, returns how many were written + size_t discharge(EWAHBoolArray &container, size_t max) { + size_t index = 0; + while (true) { + if (index + RunningLength > max) { + const size_t offset = max - index; + container.fastaddStreamOfEmptyWords(getRunningBit(), offset); + RunningLength -= offset; + return max; + } + container.fastaddStreamOfEmptyWords(getRunningBit(), RunningLength); + index += RunningLength; + if (NumberOfLiteralWords + index > max) { + const size_t offset = max - index; + writeLiteralWords(offset, container); RunningLength = 0; - NumberOfLiteralWords = static_cast (NumberOfLiteralWords - x); - } - - /** - * how many words should be filled by the running bit (see previous method) - */ - uword getRunningLength() const { - return RunningLength; - } - - /** - * followed by how many literal words? - */ - uword getNumberOfLiteralWords() const { - return NumberOfLiteralWords; - } - - /** - * Total of getRunningLength() and getNumberOfLiteralWords() - */ - uword size() const { - return static_cast (RunningLength + NumberOfLiteralWords); - } - - friend ostream& operator<< (ostream &out, const BufferedRunningLengthWord &a) { - out<<"{RunningBit:"< 0) { - if (RunningLength > x) { - RunningLength = static_cast(RunningLength - x); - return; - } - x = static_cast(x - RunningLength); - RunningLength = 0; - size_t toDiscard = - x > NumberOfLiteralWords ? NumberOfLiteralWords : x; - NumberOfLiteralWords = static_cast(NumberOfLiteralWords - - toDiscard); - x -= toDiscard; - if ((x > 0) || (size() == 0)) { - if (!next()) - break; - } - } - } - + NumberOfLiteralWords -= offset; + return max; + } + writeLiteralWords(NumberOfLiteralWords, container); + index += NumberOfLiteralWords; + if (!next()) + break; + } + return index; + } + + bool nonzero_discharge(size_t max, size_t &index) { + index = 0; + while ((index < max) && (size() > 0)) { + // first run + size_t pl = getRunningLength(); + if (index + pl > max) { + pl = max - index; + } + if ((getRunningBit()) && (pl > 0)) + return true; + index += pl; + size_t pd = getNumberOfLiteralWords(); + if (pd + index > max) { + pd = max - index; + } + if (pd > 0) + return true; + discardFirstWordsWithReload(pl + pd); + } + return false; + } + + // Write out up to max words, returns how many were written + size_t dischargeNegated(EWAHBoolArray &container, size_t max) { + // todo: could be optimized further + size_t index = 0; + while ((index < max) && (size() > 0)) { + // first run + size_t pl = getRunningLength(); + if (index + pl > max) { + pl = max - index; + } + container.fastaddStreamOfEmptyWords(!getRunningBit(), pl); + index += pl; + size_t pd = getNumberOfLiteralWords(); + if (pd + index > max) { + pd = max - index; + } + writeNegatedLiteralWords(pd, container); + discardFirstWordsWithReload(pl + pd); + index += pd; + } + return index; + } + bool nonzero_dischargeNegated(size_t max, size_t &index) { + while ((index < max) && (size() > 0)) { + // first run + size_t pl = getRunningLength(); + if (index + pl > max) { + pl = max - index; + } + if ((!getRunningBit()) && (pl > 0)) + return true; + index += pl; + size_t pd = getNumberOfLiteralWords(); + if (pd + index > max) { + pd = max - index; + } + if (pd > 0) + return true; + discardFirstWordsWithReload(pl + pd); + index += pd; + } + return false; + } + + uword getLiteralWordAt(size_t index) { return parent->dirtyWords()[index]; } + + void writeLiteralWords(size_t numWords, EWAHBoolArray &container) { + container.fastaddStreamOfDirtyWords(parent->dirtyWords(), numWords); + } + + void writeNegatedLiteralWords(size_t numWords, + EWAHBoolArray &container) { + container.addStreamOfNegatedDirtyWords(parent->dirtyWords(), numWords); + } + + void discardRunningWords() { RunningLength = 0; } + + void discardRunningWordsWithReload() { + RunningLength = 0; + if (NumberOfLiteralWords == 0) + next(); + } + + bool next() { + if (!parent->hasNext()) { + NumberOfLiteralWords = 0; + RunningLength = 0; + return false; + } + parent->next(); + return true; + } + + void read(const uword &data) { + RunningBit = data & static_cast(1); + RunningLength = static_cast( + (data >> 1) & RunningLengthWord::largestrunninglengthcount); + NumberOfLiteralWords = static_cast( + data >> (1 + RunningLengthWord::runninglengthbits)); + } + + /** + * Which bit is being repeated? + */ + bool getRunningBit() const { return RunningBit; } + + void discardFirstWords(uword x) { + if (RunningLength >= x) { + RunningLength = static_cast(RunningLength - x); + return; + } + x = static_cast(x - RunningLength); + RunningLength = 0; + NumberOfLiteralWords = static_cast(NumberOfLiteralWords - x); + } + + /** + * how many words should be filled by the running bit (see previous method) + */ + uword getRunningLength() const { return RunningLength; } + + /** + * followed by how many literal words? + */ + uword getNumberOfLiteralWords() const { return NumberOfLiteralWords; } + + /** + * Total of getRunningLength() and getNumberOfLiteralWords() + */ + uword size() const { + return static_cast(RunningLength + NumberOfLiteralWords); + } + + friend std::ostream &operator<<(std::ostream &out, + const BufferedRunningLengthWord &a) { + out << "{RunningBit:" << a.RunningBit + << ",RunningLength:" << a.RunningLength + << ",NumberOfLiteralWords:" << a.NumberOfLiteralWords << "}"; + return out; + } + void discardLiteralWordsWithReload(uword x) { + assert(NumberOfLiteralWords >= x); + NumberOfLiteralWords -= x; + if (NumberOfLiteralWords == 0) + next(); + } + + void discardFirstWordsWithReload(uword x) { + while (x > 0) { + if (RunningLength > x) { + RunningLength = static_cast(RunningLength - x); + return; + } + x = static_cast(x - RunningLength); + RunningLength = 0; + size_t toDiscard = x > NumberOfLiteralWords ? NumberOfLiteralWords : x; + NumberOfLiteralWords = + static_cast(NumberOfLiteralWords - toDiscard); + x -= toDiscard; + if ((x > 0) || (size() == 0)) { + if (!next()) + break; + } + } + } private: - - bool RunningBit; - uword RunningLength; - uword NumberOfLiteralWords; - EWAHBoolArrayRawIterator * parent; - + bool RunningBit; + uword RunningLength; + uword NumberOfLiteralWords; + EWAHBoolArrayRawIterator *parent; }; - - #endif /* RUNNINGLENGTHWORD_H_ */ From 9e1f34e39219885c02785ce804886c2c6563bb03 Mon Sep 17 00:00:00 2001 From: John ZuHone Date: Thu, 28 May 2020 22:04:51 -0400 Subject: [PATCH 37/42] This fixes a compilation error --- yt/utilities/lib/ewah_bool_array.pxd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt/utilities/lib/ewah_bool_array.pxd b/yt/utilities/lib/ewah_bool_array.pxd index 9b55e626d9a..401a59a811d 100644 --- a/yt/utilities/lib/ewah_bool_array.pxd +++ b/yt/utilities/lib/ewah_bool_array.pxd @@ -12,7 +12,7 @@ from libcpp.vector cimport vector from libcpp.map cimport map from libcpp.string cimport string from libcpp cimport bool -from libc.stdint cimport uint64_t +from libc.stdint cimport uint64_t, uint32_t # Streams req for c++ IO cdef extern from "" namespace "std": @@ -89,7 +89,7 @@ cdef extern from "boolarray.h": uword getWord(size_t pos) size_t wordinbits -ctypedef np.uint32_t ewah_word_type +ctypedef uint32_t ewah_word_type ctypedef EWAHBoolArray[ewah_word_type] ewah_bool_array ctypedef EWAHBoolArraySetBitForwardIterator[ewah_word_type] ewah_bool_iterator ctypedef vector[size_t] bitset_array From bd1fb35fe29555979aec6fff1701b8792df0f827 Mon Sep 17 00:00:00 2001 From: John ZuHone Date: Fri, 29 May 2020 11:33:38 -0400 Subject: [PATCH 38/42] Make this platform-dependent --- yt/utilities/lib/ewah_bool_array.pxd | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt/utilities/lib/ewah_bool_array.pxd b/yt/utilities/lib/ewah_bool_array.pxd index 401a59a811d..4099e841b39 100644 --- a/yt/utilities/lib/ewah_bool_array.pxd +++ b/yt/utilities/lib/ewah_bool_array.pxd @@ -89,7 +89,10 @@ cdef extern from "boolarray.h": uword getWord(size_t pos) size_t wordinbits -ctypedef uint32_t ewah_word_type +IF UNAME_SYSNAME == "Windows": + ctypedef uint32_t ewah_word_type +ELSE: + ctypedef np.uint32_t ewah_word_type ctypedef EWAHBoolArray[ewah_word_type] ewah_bool_array ctypedef EWAHBoolArraySetBitForwardIterator[ewah_word_type] ewah_bool_iterator ctypedef vector[size_t] bitset_array From 25304b30ed01d1d3eb577528afacaab7a02019f1 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Fri, 29 May 2020 12:08:33 -0500 Subject: [PATCH 39/42] Update order-of-include and C++11 for particle_oct_container.pyx --- setup.py | 7 ++++--- yt/geometry/particle_oct_container.pyx | 23 +++++++++++------------ yt/utilities/lib/ewah_bool_array.pxd | 5 +++-- yt/utilities/lib/ewah_bool_wrap.pxd | 3 +-- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/setup.py b/setup.py index 4dfbf9e2639..9911b736093 100644 --- a/setup.py +++ b/setup.py @@ -106,7 +106,8 @@ def _compile( include_dirs=["yt/utilities/lib/", "yt/utilities/lib/ewahboolarray"], language="c++", - libraries=std_libs), + libraries=std_libs, + extra_compile_args=["-std=c++11"]), Extension("yt.geometry.selection_routines", ["yt/geometry/selection_routines.pyx"], include_dirs=["yt/utilities/lib/"], @@ -153,7 +154,7 @@ def _compile( ], libraries=std_libs, language="c++", - extra_compile_arg=["-std=c++03"]), + extra_compile_args=["-std=c++03"]), Extension("yt.utilities.lib.cykdtree.utils", [ "yt/utilities/lib/cykdtree/utils.pyx", @@ -162,7 +163,7 @@ def _compile( depends=["yt/utilities/lib/cykdtree/c_utils.hpp"], libraries=std_libs, language="c++", - extra_compile_arg=["-std=c++03"]), + extra_compile_args=["-std=c++03"]), Extension("yt.utilities.lib.fnv_hash", ["yt/utilities/lib/fnv_hash.pyx"], include_dirs=["yt/utilities/lib/"], diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index cfef5378d95..66e9d04558f 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -7,38 +7,37 @@ Oct container tuned for Particles """ +from libc.stdlib cimport malloc, free, qsort +from libc.string cimport memset +from libc.math cimport floor, ceil, fmod +from libcpp.map cimport map +from libcpp.vector cimport vector +from yt.utilities.lib.ewah_bool_array cimport \ + ewah_bool_array, ewah_bool_iterator, ewah_map, bool_array, ewah_word_type +import numpy as np +cimport numpy as np + from oct_container cimport OctreeContainer, Oct, OctInfo, ORDER_MAX, \ SparseOctreeContainer, OctKey, OctAllocationContainer cimport oct_visitors from oct_visitors cimport cind, OctVisitor -from libc.stdlib cimport malloc, free, qsort -from libc.string cimport memset -from libc.math cimport floor, ceil, fmod from yt.utilities.lib.fp_utils cimport * from yt.utilities.lib.geometry_utils cimport bounded_morton, \ bounded_morton_dds, bounded_morton_relative_dds, \ bounded_morton_split_dds, bounded_morton_split_relative_dds, \ encode_morton_64bit, decode_morton_64bit, \ morton_neighbors_coarse, morton_neighbors_refined -import numpy as np -cimport numpy as np from selection_routines cimport SelectorObject, AlwaysSelector cimport cython from cython cimport floating +from cython.operator cimport dereference, preincrement from cpython.exc cimport PyErr_CheckSignals from collections import defaultdict from yt.funcs import get_pbar from particle_deposit cimport gind -from yt.utilities.lib.ewah_bool_array cimport \ - ewah_bool_array, ewah_bool_iterator, ewah_map, bool_array, ewah_word_type #from yt.utilities.lib.ewah_bool_wrap cimport \ from ..utilities.lib.ewah_bool_wrap cimport BoolArrayCollection -from libcpp cimport bool -from libcpp.map cimport map -from libcpp.vector cimport vector -from libcpp.pair cimport pair -from cython.operator cimport dereference, preincrement import struct import os diff --git a/yt/utilities/lib/ewah_bool_array.pxd b/yt/utilities/lib/ewah_bool_array.pxd index 4099e841b39..b8507b71f1f 100644 --- a/yt/utilities/lib/ewah_bool_array.pxd +++ b/yt/utilities/lib/ewah_bool_array.pxd @@ -6,8 +6,6 @@ Wrapper for EWAH Bool Array: https://github.com/lemire/EWAHBoolArray """ -cimport numpy as np -cimport cython from libcpp.vector cimport vector from libcpp.map cimport map from libcpp.string cimport string @@ -89,6 +87,9 @@ cdef extern from "boolarray.h": uword getWord(size_t pos) size_t wordinbits +cimport numpy as np +cimport cython + IF UNAME_SYSNAME == "Windows": ctypedef uint32_t ewah_word_type ELSE: diff --git a/yt/utilities/lib/ewah_bool_wrap.pxd b/yt/utilities/lib/ewah_bool_wrap.pxd index 4feeaf31e4f..229b6536b4c 100644 --- a/yt/utilities/lib/ewah_bool_wrap.pxd +++ b/yt/utilities/lib/ewah_bool_wrap.pxd @@ -1,11 +1,10 @@ -cimport numpy as np from libcpp.vector cimport vector from libcpp.set cimport set as cset from libcpp.pair cimport pair - from yt.utilities.lib.ewah_bool_array cimport \ sstream, ewah_map, ewah_bool_array, ewah_bool_iterator +cimport numpy as np ctypedef bint bitarrtype ctypedef pair[np.uint64_t, np.uint64_t] ind_pair From e5847d804acfc23e7c6ce8d519c626d81168670a Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Fri, 29 May 2020 12:15:31 -0500 Subject: [PATCH 40/42] Update to Bionic as per Kacper's suggestion --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 56834d22208..e7cfad98563 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,5 @@ language: python -dist: xenial +dist: bionic cache: pip: true directories: From c85827ea9b3b3a957dc38e0cd4ff778b26e79bae Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Tue, 2 Jun 2020 16:57:12 -0500 Subject: [PATCH 41/42] Fix a handful of lint and style issues --- yt/geometry/particle_oct_container.pyx | 62 +++++++++----------------- 1 file changed, 20 insertions(+), 42 deletions(-) diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx index 66e9d04558f..85e19a08419 100644 --- a/yt/geometry/particle_oct_container.pyx +++ b/yt/geometry/particle_oct_container.pyx @@ -491,7 +491,7 @@ cdef class ParticleBitmap: np.uint64_t file_id) except *: # Initialize cdef np.int64_t i, p - cdef np.uint64_t mi, miex, mi_max + cdef np.uint64_t mi, miex cdef np.uint64_t mi_split[3] cdef np.float64_t ppos[3] cdef np.float64_t s_ppos[3] # shifted ppos @@ -511,7 +511,6 @@ cdef class ParticleBitmap: cdef np.uint64_t msize = (1 << (self.index_order1 * 3)) cdef int axiter[3][2] cdef np.float64_t axiterv[3][2] - mi_max = (1 << self.index_order1) - 1 # Copy over things for this file (type cast necessary?) for i in range(3): LE[i] = self.left_edge[i] @@ -619,12 +618,9 @@ cdef class ParticleBitmap: if in_collection is None: in_collection = BoolArrayCollection() cdef BoolArrayCollection _in_coll = in_collection - cdef np.int64_t nsub out_collection = self.__refined_index_data_file(_in_coll, pos, hsml, mask, - sub_mi1, sub_mi2, - file_id, &nsub, count_threshold, mask_threshold) - return nsub, out_collection + return 0, out_collection @cython.boundscheck(False) @cython.wraparound(False) @@ -636,9 +632,6 @@ cdef class ParticleBitmap: np.ndarray[floating, ndim=2] pos, np.ndarray[floating, ndim=1] hsml, np.ndarray[np.uint8_t, ndim=1] mask, - np.ndarray[np.uint64_t, ndim=1] sub_mi1, - np.ndarray[np.uint64_t, ndim=1] sub_mi2, - np.uint64_t file_id, np.int64_t *nsub_mi, np.uint64_t count_threshold, np.uint8_t mask_threshold ): # Initialize @@ -647,7 +640,7 @@ cdef class ParticleBitmap: cdef np.uint64_t mi1, mi2 cdef np.float64_t ppos[3] cdef np.float64_t s_ppos[3] # shifted ppos - cdef int skip, Nex + cdef int skip cdef BoolArrayCollection this_collection, out_collection cdef np.uint64_t bounds[2][3] cdef np.uint8_t fully_enclosed @@ -660,29 +653,16 @@ cdef class ParticleBitmap: cdef np.float64_t radius cdef np.uint64_t mi_split1[3] cdef np.uint64_t mi_split2[3] - cdef np.uint64_t miex1, miex2, mi1_max, mi2_max + cdef np.uint64_t miex1 cdef np.uint64_t[:] particle_counts = self.particle_counts - cdef int Nex_min[3] - cdef int Nex_max[3] - cdef np.float64_t rpos_min, rpos_max - cdef np.uint64_t xex2_min, xex2_max, yex2_min, yex2_max, zex2_min, zex2_max cdef np.uint64_t xex, yex, zex - cdef np.uint64_t xex1, yex1, zex1 - cdef np.uint64_t xex2, yex2, zex2 - cdef int ix, iy, iz, ixe, iye, ize - cdef np.ndarray[np.uint64_t, ndim=1] xex1_range = np.empty(7, 'uint64') - cdef np.ndarray[np.uint64_t, ndim=1] yex1_range = np.empty(7, 'uint64') - cdef np.ndarray[np.uint64_t, ndim=1] zex1_range = np.empty(7, 'uint64') - cdef np.ndarray[np.uint64_t, ndim=1] xex2_range = np.empty(7, 'uint64') - cdef np.ndarray[np.uint64_t, ndim=1] yex2_range = np.empty(7, 'uint64') - cdef np.ndarray[np.uint64_t, ndim=1] zex2_range = np.empty(7, 'uint64') - cdef np.float64_t clip_pos_l[3], clip_pos_r[3] - cdef np.int64_t msize = sub_mi1.shape[0] + cdef np.float64_t clip_pos_l[3] + cdef np.float64_t clip_pos_r[3] cdef int axiter[3][2] cdef np.float64_t axiterv[3][2] cdef CoarseRefinedSets coarse_refined_map cdef map[np.uint64_t, np.uint64_t] refined_count - cdef np.uint64_t nset = 0, nfully_enclosed = 0, n_calls = 0 + cdef np.uint64_t nfully_enclosed = 0, n_calls = 0 mi1_max = (1 << self.index_order1) - 1 mi2_max = (1 << self.index_order2) - 1 cdef np.uint64_t max_mi1_elements = 1 << (3*self.index_order1) @@ -731,7 +711,7 @@ cdef class ParticleBitmap: ppos[0], ppos[1], ppos[2], LE, dds1, dds2, mi_split2) if refined_count[mi1] == 0: coarse_refined_map[mi1].padWithZeroes(max_mi2_elements) - if coarse_refined_map[mi1].get(mi2) == False: + if not coarse_refined_map[mi1].get(mi2): coarse_refined_map[mi1].set(mi2) refined_count[mi1] += 1 else: # only hit if we have smoothing lengths. @@ -803,11 +783,9 @@ cdef class ParticleBitmap: n_calls += 1 refined_count[miex1] += self.__fill_refined_ranges(s_ppos, radius, LE, RE, dds1, xex, yex, zex, - dds2, mi1_max, mi2_max, miex1, - coarse_refined_map[miex1], ppos, mask[miex1], - max_mi2_elements) - cdef np.uint64_t count, vec_i - cdef np.uint64_t total_count = 0 + dds2, + coarse_refined_map[miex1]) + cdef np.uint64_t vec_i cdef bool_array *buf = NULL cdef ewah_word_type w this_collection = BoolArrayCollection() @@ -832,20 +810,20 @@ cdef class ParticleBitmap: cdef np.int64_t __fill_refined_ranges(self, np.float64_t s_ppos[3], np.float64_t radius, np.float64_t LE[3], np.float64_t RE[3], np.float64_t dds1[3], np.uint64_t xex, np.uint64_t yex, np.uint64_t zex, - np.float64_t dds2[3], - np.uint64_t mi1_max, np.uint64_t mi2_max, np.uint64_t miex1, - bool_array &refined_set, np.float64_t ppos[3], np.uint64_t mcount, - np.uint64_t max_mi2_elements) except -1: + np.float64_t dds2[3], bool_array &refined_set) except -1: cdef int i cdef np.uint64_t new_nsub = 0 cdef np.uint64_t bounds_l[3], bounds_r[3] - cdef np.uint64_t miex2, mi2, miex2_min, miex2_max - cdef np.float64_t clip_pos_l[3], clip_pos_r[3], cell_edge_l, cell_edge_r - cdef np.uint64_t ex1[3], ex2[3], ex3[3] - cdef np.uint64_t xex_max, yex_max, zex_max + cdef np.uint64_t miex2, miex2_min, miex2_max + cdef np.float64_t clip_pos_l[3] + cdef np.float64_t clip_pos_r[3] + cdef np.float64_t cell_edge_l, cell_edge_r + cdef np.uint64_t ex1[3] cdef np.uint64_t xiex_min, yiex_min, ziex_min cdef np.uint64_t xiex_max, yiex_max, ziex_max - ex1[0] = xex; ex1[1] = yex; ex1[2] = zex + ex1[0] = xex + ex1[1] = yex + ex1[2] = zex # Check a few special cases for i in range(3): # Figure out our bounds inside our coarse cell, in the space of the From 59290f019f0c981d4850823d70236ac6e72c7b67 Mon Sep 17 00:00:00 2001 From: Matthew Turk Date: Wed, 3 Jun 2020 11:42:45 -0500 Subject: [PATCH 42/42] Updating to new answer-store rev --- answer-store | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/answer-store b/answer-store index 49870bcc8f4..d607a2e1a47 160000 --- a/answer-store +++ b/answer-store @@ -1 +1 @@ -Subproject commit 49870bcc8f4d32fcd6980a65239574f2cdd3b159 +Subproject commit d607a2e1a47947971e7e004e9bfd92664714b14b