CEMeNT-PSAAP · alexandermote · Oct 10, 2024 · Oct 11, 2024 · Oct 15, 2024 · Oct 15, 2024
diff --git a/mcdc/kernel.py b/mcdc/kernel.py
@@ -1968,6 +1968,38 @@ def score_surface_tally(P_arr, surface, tally, data, mcdc):
         adapt.global_add(tally_bin, (TALLY_SCORE, idx + i), score)
 
 
+@njit
+def dd_reduce(data, mcdc):
+    tally_bin = data[TALLY]
+
+    # find number of subdomains
+    d_Nx = mcdc["technique"]["dd_mesh"]["x"].size - 1
+    d_Ny = mcdc["technique"]["dd_mesh"]["y"].size - 1
+    d_Nz = mcdc["technique"]["dd_mesh"]["z"].size - 1
+
+    with objmode():
+        # assign processors to their subdomain group
+        i = 0
+        for n in range(d_Nx * d_Ny * d_Nz):
+            dd_group = []
+            for r in range(int(mcdc["technique"]["dd_work_ratio"][n])):
+                dd_group.append(i)
+                i += 1
+            # create MPI Comm group out of subdomain processors
+            dd_group = MPI.COMM_WORLD.group.Incl(dd_group)
+            dd_comm = MPI.COMM_WORLD.Create(dd_group)
+            # MPI Reduce on subdomain processors
+            buff = np.zeros_like(tally_bin[TALLY_SCORE])
+            if MPI.COMM_NULL != dd_comm:
+                dd_comm.Reduce(tally_bin[TALLY_SCORE], buff, MPI.SUM, 0)
+            if mcdc["dd_idx"] == n:
+                tally_bin[TALLY_SCORE][:] = buff
+            # free comm group
+            dd_group.Free()
+            if MPI.COMM_NULL != dd_comm:
+                dd_comm.Free()
+
+
 @njit
 def tally_reduce(data, mcdc):
     tally_bin = data[TALLY]
@@ -1985,6 +2017,16 @@ def tally_reduce(data, mcdc):
             MPI.COMM_WORLD.Reduce(tally_bin[TALLY_SCORE], buff, MPI.SUM, 0)
         tally_bin[TALLY_SCORE][:] = buff
 
+    else:
+        # find number of subdomains
+        N_dd = 1
+        N_dd *= mcdc["technique"]["dd_mesh"]["x"].size - 1
+        N_dd *= mcdc["technique"]["dd_mesh"]["y"].size - 1
+        N_dd *= mcdc["technique"]["dd_mesh"]["z"].size - 1
+        # DD Reduce if multiple processors per subdomain
+        if N_dd != mcdc["mpi_size"]:
+            dd_reduce(data, mcdc)
+
 
 @njit
 def tally_accumulate(data, mcdc):
@@ -2001,6 +2043,42 @@ def tally_accumulate(data, mcdc):
         tally_bin[TALLY_SCORE, i] = 0.0
 
 
+@njit
+def dd_closeout(data, mcdc):
+    tally_bin = data[TALLY]
+
+    # find number of subdomains
+    d_Nx = mcdc["technique"]["dd_mesh"]["x"].size - 1
+    d_Ny = mcdc["technique"]["dd_mesh"]["y"].size - 1
+    d_Nz = mcdc["technique"]["dd_mesh"]["z"].size - 1
+
+    with objmode():
+        # assign processors to their subdomain group
+        i = 0
+        for n in range(d_Nx * d_Ny * d_Nz):
+            dd_ranks = []
+            for r in range(int(mcdc["technique"]["dd_work_ratio"][n])):
+                dd_ranks.append(i)
+                i += 1
+            # create MPI Comm group out of subdomain processors
+            dd_group = MPI.COMM_WORLD.group.Incl(dd_ranks)
+            dd_comm = MPI.COMM_WORLD.Create(dd_group)
+            # MPI Reduce on subdomain processors
+            buff = np.zeros_like(tally_bin[TALLY_SUM])
+            buff_sq = np.zeros_like(tally_bin[TALLY_SUM_SQ])
+            if MPI.COMM_NULL != dd_comm:
+                dd_comm.Reduce(tally_bin[TALLY_SUM], buff, MPI.SUM, 0)
+                dd_comm.Reduce(tally_bin[TALLY_SUM_SQ], buff_sq, MPI.SUM, 0)
+            if mcdc["dd_idx"] == n:
+                tally_bin[TALLY_SUM] = buff
+                tally_bin[TALLY_SUM_SQ] = buff_sq * len(dd_ranks)
+
+            # free comm group
+            dd_group.Free()
+            if MPI.COMM_NULL != dd_comm:
+                dd_comm.Free()
+
+
 @njit
 def tally_closeout(data, mcdc):
     tally = data[TALLY]
@@ -2022,6 +2100,16 @@ def tally_closeout(data, mcdc):
         tally[TALLY_SUM] = buff
         tally[TALLY_SUM_SQ] = buff_sq
 
+    else:
+        # find number of subdomains
+        N_dd = 1
+        N_dd *= mcdc["technique"]["dd_mesh"]["x"].size - 1
+        N_dd *= mcdc["technique"]["dd_mesh"]["y"].size - 1
+        N_dd *= mcdc["technique"]["dd_mesh"]["z"].size - 1
+        # DD Reduce if multiple processors per subdomain
+        if N_dd != mcdc["mpi_size"]:
+            dd_closeout(data, mcdc)
+
     # Calculate and store statistics
     #   sum --> mean
     #   sum_sq --> standard deviation

diff --git a/mcdc/main.py b/mcdc/main.py
@@ -1291,10 +1291,45 @@ def dd_mergetally(mcdc, data):
     ylen = len(mcdc["mesh_tallies"][0]["filter"]["y"]) - 1
     zlen = len(mcdc["mesh_tallies"][0]["filter"]["z"]) - 1
 
-    dd_tally = np.zeros((tally.shape[0], tally.shape[1] * d_Nx * d_Ny * d_Nz))
-    # gather tallies
-    for i, t in enumerate(tally):
-        MPI.COMM_WORLD.Gather(tally[i], dd_tally[i], root=0)
+    # MPI gather
+    if (d_Nx * d_Ny * d_Nz) == MPI.COMM_WORLD.Get_size():
+        sendcounts = np.array(MPI.COMM_WORLD.gather(len(tally[0]), root=0))
+        # print(MPI.COMM_WORLD.Get_rank(), tally)
+        if mcdc["mpi_master"]:
+            dd_tally = np.zeros((tally.shape[0], sum(sendcounts)))
+        else:
+            dd_tally = np.empty(tally.shape[0])  # dummy tally
+        # gather tallies
+        for i, t in enumerate(tally):
+            MPI.COMM_WORLD.Gatherv(
+                sendbuf=tally[i], recvbuf=(dd_tally[i], sendcounts), root=0
+            )
+        # print(dd_tally)
+
+    # MPI gather for multiprocessor subdomains
+    else:
+        i = 0
+        dd_ranks = []
+        # find nonzero tally processor IDs
+        for n in range(d_Nx * d_Ny * d_Nz):
+            dd_ranks.append(i)
+            i += int(mcdc["technique"]["dd_work_ratio"][n])
+        # create MPI comm group for nonzero tallies
+        dd_group = MPI.COMM_WORLD.group.Incl(dd_ranks)
+        dd_comm = MPI.COMM_WORLD.Create(dd_group)
+        dd_tally = np.empty(tally.shape[0])  # dummy tally
+
+        if MPI.COMM_NULL != dd_comm:
+            sendcounts = np.array(dd_comm.gather(len(tally[0]), root=0))
+            if mcdc["mpi_master"]:
+                dd_tally = np.zeros((tally.shape[0], sum(sendcounts)))
+            # gather tallies
+            for i, t in enumerate(tally):
+                dd_comm.Gatherv(tally[i], (dd_tally[i], sendcounts), root=0)
+        dd_group.Free()
+        if MPI.COMM_NULL != dd_comm:
+            dd_comm.Free()
+
     if mcdc["mpi_master"]:
         buff = np.zeros_like(dd_tally)
         # reorganize tally data
@@ -1323,10 +1358,100 @@ def dd_mergetally(mcdc, data):
     return dd_tally
 
 
+def dd_mergemesh(mcdc, data):
+    """
+    Performs mesh recombination on domain-decomposed mesh tallies.
+    Gathers and re-organizes mesh data into a single array as it
+      would appear in a non-decomposed simulation.
+    """
+    d_Nx = input_deck.technique["dd_mesh"]["x"].size - 1
+    d_Ny = input_deck.technique["dd_mesh"]["y"].size - 1
+    d_Nz = input_deck.technique["dd_mesh"]["z"].size - 1
+    # gather mesh filter
+    if d_Nx > 1:
+        sendcounts = np.array(
+            MPI.COMM_WORLD.gather(len(mcdc["mesh_tallies"][0]["filter"]["x"]), root=0)
+        )
+        if mcdc["mpi_master"]:
+            x_filter = np.zeros((mcdc["mesh_tallies"].shape, sum(sendcounts)))
+        else:
+            x_filter = np.empty((mcdc["mesh_tallies"].shape))  # dummy tally
+        # gather mesh
+        for i in range(mcdc["mesh_tallies"].shape):
+            MPI.COMM_WORLD.Gatherv(
+                sendbuf=mcdc["mesh_tallies"][i]["filter"]["x"],
+                recvbuf=(x_filter[i], sendcounts),
+                root=0,
+            )
+        if mcdc["mpi_master"]:
+            x_final = np.zeros((mcdc["mesh_tallies"].shape[0], x_filter.shape[1] + 1))
+            x_final[:, 0] = mcdc["mesh_tallies"][:]["filter"]["x"][0][0]
+            x_final[:, 1:] = x_filter
+
+    if d_Ny > 1:
+        sendcounts = np.array(
+            MPI.COMM_WORLD.gather(len(mcdc["mesh_tallies"][0]["filter"]["y"]), root=0)
+        )
+        if mcdc["mpi_master"]:
+            y_filter = np.zeros((mcdc["mesh_tallies"].shape[0], sum(sendcounts)))
+        else:
+            y_filter = np.empty((mcdc["mesh_tallies"].shape[0]))  # dummy tally
+        # gather mesh
+        for i in range(mcdc["mesh_tallies"].shape):
+            MPI.COMM_WORLD.Gatherv(
+                sendbuf=mcdc["mesh_tallies"][i]["filter"]["y"],
+                recvbuf=(y_filter[i], sendcounts),
+                root=0,
+            )
+        if mcdc["mpi_master"]:
+            y_final = np.zeros((mcdc["mesh_tallies"].shape[0], y_filter.shape[1] + 1))
+            y_final[:, 0] = mcdc["mesh_tallies"][:]["filter"]["y"][0][0]
+            y_final[:, 1:] = y_filter
+
+    if d_Nz > 1:
+        sendcounts = np.array(
+            MPI.COMM_WORLD.gather(
+                len(mcdc["mesh_tallies"][0]["filter"]["z"]) - 1, root=0
+            )
+        )
+        if mcdc["mpi_master"]:
+            z_filter = np.zeros((mcdc["mesh_tallies"].shape[0], sum(sendcounts)))
+        else:
+            z_filter = np.empty((mcdc["mesh_tallies"].shape[0]))  # dummy tally
+        # gather mesh
+        for i in range(mcdc["mesh_tallies"].shape[0]):
+            MPI.COMM_WORLD.Gatherv(
+                sendbuf=mcdc["mesh_tallies"][i]["filter"]["z"][1:],
+                recvbuf=(z_filter[i], sendcounts),
+                root=0,
+            )
+        if mcdc["mpi_master"]:
+            z_final = np.zeros((mcdc["mesh_tallies"].shape[0], z_filter.shape[1] + 1))
+            z_final[:, 0] = mcdc["mesh_tallies"][:]["filter"]["z"][0][0]
+            z_final[:, 1:] = z_filter
+
+    dd_mesh = []
+    if mcdc["mpi_master"]:
+        if d_Nx > 1:
+            dd_mesh.append(x_final)
+        else:
+            dd_mesh.append(mcdc["mesh_tallies"][:]["filter"]["x"])
+        if d_Ny > 1:
+            dd_mesh.append(y_final)
+        else:
+            dd_mesh.append(mcdc["mesh_tallies"][:]["filter"]["y"])
+        if d_Nz > 1:
+            dd_mesh.append(z_final)
+        else:
+            dd_mesh.append("z", mcdc["mesh_tallies"][:]["filter"]["z"])
+    return dd_mesh
+
+
 def generate_hdf5(data, mcdc):
 
     if mcdc["technique"]["domain_decomposition"]:
         dd_tally = dd_mergetally(mcdc, data)
+        dd_mesh = dd_mergemesh(mcdc, data)
 
     if mcdc["mpi_master"]:
         if mcdc["setting"]["progress_bar"]:
@@ -1364,15 +1489,34 @@ def generate_hdf5(data, mcdc):
 
                 mesh = tally["filter"]
                 f.create_dataset("tallies/mesh_tally_%i/grid/t" % ID, data=mesh["t"])
-                f.create_dataset("tallies/mesh_tally_%i/grid/x" % ID, data=mesh["x"])
-                f.create_dataset("tallies/mesh_tally_%i/grid/y" % ID, data=mesh["y"])
-                f.create_dataset("tallies/mesh_tally_%i/grid/z" % ID, data=mesh["z"])
                 f.create_dataset("tallies/mesh_tally_%i/grid/mu" % ID, data=mesh["mu"])
                 f.create_dataset(
                     "tallies/mesh_tally_%i/grid/azi" % ID, data=mesh["azi"]
                 )
                 f.create_dataset("tallies/mesh_tally_%i/grid/g" % ID, data=mesh["g"])
 
+                if not mcdc["technique"]["domain_decomposition"]:
+                    f.create_dataset(
+                        "tallies/mesh_tally_%i/grid/x" % ID, data=mesh["x"]
+                    )
+                    f.create_dataset(
+                        "tallies/mesh_tally_%i/grid/y" % ID, data=mesh["y"]
+                    )
+                    f.create_dataset(
+                        "tallies/mesh_tally_%i/grid/z" % ID, data=mesh["z"]
+                    )
+                else:
+                    # substitute recomposed mesh
+                    f.create_dataset(
+                        "tallies/mesh_tally_%i/grid/x" % ID, data=dd_mesh[0][ID]
+                    )
+                    f.create_dataset(
+                        "tallies/mesh_tally_%i/grid/y" % ID, data=dd_mesh[1][ID]
+                    )
+                    f.create_dataset(
+                        "tallies/mesh_tally_%i/grid/z" % ID, data=dd_mesh[2][ID]
+                    )
+
                 # Shape
                 Nmu = len(mesh["mu"]) - 1
                 N_azi = len(mesh["azi"]) - 1

diff --git a/test/regression/dd_slab_reed/answer.h5 b/test/regression/dd_slab_reed/answer.h5
diff --git a/test/regression/dd_slab_reed/input.py b/test/regression/dd_slab_reed/input.py
@@ -46,6 +46,7 @@
 
 # Setting
 mcdc.setting(N_particle=5000)
-mcdc.domain_decomposition(z=np.linspace(0.0, 8.0, 5))
+dd_z = np.array([0.0, 2.0, 3.0, 5.0, 8.0])
+mcdc.domain_decomposition(z=dd_z)
 # Run
 mcdc.run()