From 28fb30cf2f2d8f3a3a3bba2619d17476a200ec8c Mon Sep 17 00:00:00 2001 From: alexandermote Date: Thu, 10 Oct 2024 16:35:13 -0700 Subject: [PATCH 1/6] Modified dd_slab_reed DD mesh and revised DD tally gather --- mcdc/kernel.py | 84 +++++++++++++++++++++++++++ mcdc/main.py | 43 ++++++++++++-- test/regression/dd_slab_reed/input.py | 3 +- 3 files changed, 125 insertions(+), 5 deletions(-) diff --git a/mcdc/kernel.py b/mcdc/kernel.py index 7dcd817d..4eacdc75 100644 --- a/mcdc/kernel.py +++ b/mcdc/kernel.py @@ -1968,6 +1968,38 @@ def score_surface_tally(P_arr, surface, tally, data, mcdc): adapt.global_add(tally_bin, (TALLY_SCORE, idx + i), score) +@njit +def dd_reduce(data, mcdc): + tally_bin = data[TALLY] + + # find number of subdomains + d_Nx = mcdc["technique"]["dd_mesh"]["x"].size - 1 + d_Ny = mcdc["technique"]["dd_mesh"]["y"].size - 1 + d_Nz = mcdc["technique"]["dd_mesh"]["z"].size - 1 + + # assign processors to their subdomain group + i = 0 + for n in range(d_Nx * d_Ny * d_Nz): + dd_group = [] + for r in range(int(mcdc["technique"]["dd_work_ratio"][n])): + dd_group.append(i) + i += 1 + # create MPI Comm group out of subdomain processors + dd_group = MPI.COMM_WORLD.group.Incl(dd_group) + dd_comm = MPI.COMM_WORLD.Create(dd_group) + # MPI Reduce on subdomain processors + buff = np.zeros_like(tally_bin[TALLY_SCORE]) + if MPI.COMM_NULL != dd_comm: + with objmode(): + dd_comm.Reduce(tally_bin[TALLY_SCORE], buff, MPI.SUM, 0) + if mcdc["dd_idx"] == n: + tally_bin[TALLY_SCORE][:] = buff + # free comm group + dd_group.Free() + if MPI.COMM_NULL != dd_comm: + dd_comm.Free() + + @njit def tally_reduce(data, mcdc): tally_bin = data[TALLY] @@ -1985,6 +2017,15 @@ def tally_reduce(data, mcdc): MPI.COMM_WORLD.Reduce(tally_bin[TALLY_SCORE], buff, MPI.SUM, 0) tally_bin[TALLY_SCORE][:] = buff + else: + # find number of subdomains + N_dd = 1 + for name in ["x", "y", "z", "t", "mu", "azi"]: + N_dd *= mcdc["technique"]["dd_mesh"][name].size - 1 + # DD Reduce if multiple processors per subdomain + if N_dd != MPI.COMM_WORLD.Get_size(): + dd_reduce(data, mcdc) + @njit def tally_accumulate(data, mcdc): @@ -2000,6 +2041,40 @@ def tally_accumulate(data, mcdc): # Reset score bin tally_bin[TALLY_SCORE, i] = 0.0 +@njit +def dd_closeout(data, mcdc): + tally_bin = data[TALLY] + + # find number of subdomains + d_Nx = mcdc["technique"]["dd_mesh"]["x"].size - 1 + d_Ny = mcdc["technique"]["dd_mesh"]["y"].size - 1 + d_Nz = mcdc["technique"]["dd_mesh"]["z"].size - 1 + + # assign processors to their subdomain group + i = 0 + for n in range(d_Nx * d_Ny * d_Nz): + dd_ranks = [] + for r in range(int(mcdc["technique"]["dd_work_ratio"][n])): + dd_ranks.append(i) + i += 1 + # create MPI Comm group out of subdomain processors + dd_group = MPI.COMM_WORLD.group.Incl(dd_ranks) + dd_comm = MPI.COMM_WORLD.Create(dd_group) + # MPI Reduce on subdomain processors + buff = np.zeros_like(tally_bin[TALLY_SUM]) + buff_sq = np.zeros_like(tally_bin[TALLY_SUM_SQ]) + if MPI.COMM_NULL != dd_comm: + with objmode(): + dd_comm.Reduce(tally_bin[TALLY_SUM], buff, MPI.SUM, 0) + dd_comm.Reduce(tally_bin[TALLY_SUM_SQ], buff_sq, MPI.SUM, 0) + if mcdc["dd_idx"] == n: + tally_bin[TALLY_SUM] = buff + tally_bin[TALLY_SUM_SQ] = buff_sq * len(dd_ranks) + + # free comm group + dd_group.Free() + if MPI.COMM_NULL != dd_comm: + dd_comm.Free() @njit def tally_closeout(data, mcdc): @@ -2022,6 +2097,15 @@ def tally_closeout(data, mcdc): tally[TALLY_SUM] = buff tally[TALLY_SUM_SQ] = buff_sq + else: + # find number of subdomains + N_dd = 1 + for name in ["x", "y", "z", "t", "mu", "azi"]: + N_dd *= mcdc["technique"]["dd_mesh"][name].size - 1 + # DD Reduce if multiple processors per subdomain + if N_dd != MPI.COMM_WORLD.Get_size(): + dd_closeout(data, mcdc) + # Calculate and store statistics # sum --> mean # sum_sq --> standard deviation diff --git a/mcdc/main.py b/mcdc/main.py index 1ee0a950..8e1756b3 100644 --- a/mcdc/main.py +++ b/mcdc/main.py @@ -1291,10 +1291,45 @@ def dd_mergetally(mcdc, data): ylen = len(mcdc["mesh_tallies"][0]["filter"]["y"]) - 1 zlen = len(mcdc["mesh_tallies"][0]["filter"]["z"]) - 1 - dd_tally = np.zeros((tally.shape[0], tally.shape[1] * d_Nx * d_Ny * d_Nz)) - # gather tallies - for i, t in enumerate(tally): - MPI.COMM_WORLD.Gather(tally[i], dd_tally[i], root=0) + # MPI gather + if (d_Nx * d_Ny * d_Nz) == MPI.COMM_WORLD.Get_size(): + sendcounts = np.array(MPI.COMM_WORLD.gather(len(tally[0]),root=0)) + #print(MPI.COMM_WORLD.Get_rank(), tally) + if mcdc["mpi_master"]: + dd_tally = np.zeros((tally.shape[0], sum(sendcounts))) + else: + dd_tally = np.empty(tally.shape[0]) # dummy tally + # gather tallies + for i, t in enumerate(tally): + MPI.COMM_WORLD.Gatherv(sendbuf=tally[i], recvbuf=(dd_tally[i], sendcounts), root=0) + #print(dd_tally) + + # MPI gather for multiprocessor subdomains + else: + i = 0 + dd_ranks = [] + # find nonzero tally processor IDs + for n in range(d_Nx * d_Ny * d_Nz): + dd_ranks.append(i) + i += int(mcdc["technique"]["dd_work_ratio"][n]) + # create MPI comm group for nonzero tallies + dd_group = MPI.COMM_WORLD.group.Incl(dd_ranks) + dd_comm = MPI.COMM_WORLD.Create(dd_group) + dd_tally = np.empty(tally.shape[0]) # dummy tally + + if MPI.COMM_NULL != dd_comm: + sendcounts = np.array(dd_comm.gather(len(tally[0]),root=0)) + #print(MPI.COMM_WORLD.Get_rank(), tally) + if mcdc["mpi_master"]: + dd_tally = np.zeros((tally.shape[0], sum(sendcounts))) + # gather tallies + for i, t in enumerate(tally): + dd_comm.Gatherv(tally[i], (dd_tally[i], sendcounts), root=0) + dd_group.Free() + if MPI.COMM_NULL != dd_comm: + dd_comm.Free() + #print(dd_tally) + if mcdc["mpi_master"]: buff = np.zeros_like(dd_tally) # reorganize tally data diff --git a/test/regression/dd_slab_reed/input.py b/test/regression/dd_slab_reed/input.py index bf4826ac..19e0c405 100644 --- a/test/regression/dd_slab_reed/input.py +++ b/test/regression/dd_slab_reed/input.py @@ -46,6 +46,7 @@ # Setting mcdc.setting(N_particle=5000) -mcdc.domain_decomposition(z=np.linspace(0.0, 8.0, 5)) +dd_z = np.array([0.0,2.0,3.0,5.0,8.0]) +mcdc.domain_decomposition(z=dd_z) # Run mcdc.run() From d486be1482bdc75d9ed919596dcd47415d759187 Mon Sep 17 00:00:00 2001 From: alexandermote Date: Fri, 11 Oct 2024 12:17:59 -0700 Subject: [PATCH 2/6] Updated dd_slab_reed answer file --- test/regression/dd_slab_reed/answer.h5 | Bin 213312 -> 214184 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/test/regression/dd_slab_reed/answer.h5 b/test/regression/dd_slab_reed/answer.h5 index b38e7dc66f030a18a5a924c48f1504741e79b41b..fc0b74559744a3d5953a1224619fa1dea1bb714e 100644 GIT binary patch delta 4127 zcmai13s_Xu7H02_bcP8W-iQK&*I+fj>6w-g~XR*IIjx zG0(1alU=?BmyXv5ykI?UV<@o_KS=tuvMM+8O(W%TwR!xi2}{VL*W@b8yWA`HTi>zq zw>Il#EXzm}p~lSHm}z>-lI6=XtgQU`JxVtez}G1Kkh=1w((T!|Rqley9R=3giEF4h zd4eB|ZAI@v*HLT(ms^o9WVRwT=vt8-+)c+x!j^03`X*)hS=p+hGR#j&SSVae#p@Kx ze0k8S>okVdUc^(#&A{pE0HfJF2o#ph$KlqTvQN0WQ5-tf!uTW zbs>?*!Guo?9YrEr5x(uC1EJO|@9TZJE;usy=>Sip7*D6xPlGawDENI3Th+aYp= zZ8~NEt_0PWY-Ou94D-IhAK{e-`0*xwi(l2xRhfB1_ATFp`MQAlq?x(mIkb+CDpL!Z zgLpAUH-OntMT|$8U7oyd1Se!3!K)CyYuWgb1=wGctrTo4u?mrq{yt|ajdpk`eAL_-lq^{>IR5KtwBgYwE+^Zx^i|iQxa<%uwR<+_$S*sz z^zk=b*wv&d^wOb8H;gaF(+MyCgqF89@oeRm^hI(>QQ zwyAAxu{5XQ%DvoOS`SL^hTpF6jHh-Xd1F7ynMPYGj@{OD#?c1wtj08FeRV$%KFpok zw?`eE_Ns=?P>pQPnM!Ha*oTdNx_DaqH?-t$#-G-==rx)3ExeiL(5D6e#9J14q-YK) zdS({Ac++sNV%tmf+kvK}1z)^KY3QBE6+yFT$ih?IhOz|u^~bNJ+HFswcJGAG_SrC- zUTKW`sz;kZXT7q##VaRg9=)?*R!v*w%T(=_(eK{mIrQHF!izR76KT3f z=h;c^6KLLz2m7MKCQ!#)^Bt1vBI%b^DQVLS1F6HLJk7E{hEunF~ zR4Ba$GlRTihfzQOwRt-a2h#U`n?0m1Jdkc5-j)Aup${!Pq^_wO`kWuNx8HH*(s(s} z9G8CTXoQ-cKRmL2)vN*ZjIv*iX_EsjFjKwW8bQ7@3vpAG5cwHyMtny=FzKqo{GrM9 ztej}g<;2t|Mm_}BV+|P7Fz2C7m2<`{w|Y|ZU{yUf@eya{LZT7xsf4zy1=0YdxJIzDZrr!yA>E&N%IveREtyz zk|{@uu1ZIf`w~{*%hG65El1{l#o$u0GGiZu5O`iJTpYz{(-+KlL46(x650Mm$<|3wfeJwtrEHztRT_$ZC+=4m(@~57`BYD`#17nuwMTdLGpRTf%+0VCEa(pV4kP4k2xayAYj_iq z%JFi(YXS!<6PucUr`JBh-4roj?$43rZ$(%bz>&;b(qQr|j^y5Ea64#k;$67FAh5d5 z4tu8s?2$l!WZpu&!mdutf$laoDjxnITh8v^VtJHkd1jYndHMkP5#BhEz(6DNvh|et zbZ=&Q+6t^Uz}a?u6lo!=gB`4BVN|DNVFW|JNEWgHCfZ*`2;ymzcJT%Egz^X$FF%QJ z#~8#B^*vc55h?J`A{29jRIoreo+YM5WiL7^r@;Y%dEtmiRjP6!=eRtm!oc?hz4aPq0I4e8lhsseqBw%xCt}+9BL(5Z3&T#S0hS>y~NpA0icm zgMZ5O>?1Km`~_%xjDI5mf>`cOfUXQ{l$M?|=5ui$q=Ncl)<8u;J!@7VSBZWmp+cXe zjSwt_Yn%n)SOI?I$T3t3Wnym?Q^>j<73^f|@TOB7Xl;oav|CsRHkDyDWZIHTc!nUH zwITfxW^4DhR14UO^6kGO;L77+4M+&EC&R3F)MEt<*w4&L1_x)hgUXq(cy=OcWv+Zd zRz?@G$x~*U*>oq7<<%28A@>y51868lEH(HnMok&I}Qzbv&x7Z)J&nn0Oy=IE+^ zYNwbd$yT7hFWoYX^CSDfq{Av=^pjj-y%=fIh!FCRuwzN_A>LqO2>VEKF(q^VCCP)! znLK_l@n+=%=|C@PcAZ9vyhqR|n4ux15bz>SCmYxmus}Ki`AImIe89NtNvwerUr92k z)*NIcuop$vO8O;IE7`|4estM$it-@YN0pRw(rL#8LgG-;h~i-NFjB$?Tid|-Kw?XD zfs&P`c&rdm5LwBd!e9bxhmpMrd*?p5;;(Y0Ye%kKbNCr&TGC-0T$AZc8$TVUUogy_ zexM9lu`|S(CMH$+d|c*Af9+n`t()OWtIzMto}A!Hw{dWK)O;rz`$O}VpjKCUNqMs+ zA=-tGhooBU=6`D?Ir7O?N7~bnIs>d+=#iPzl@%RsRQTS%ZCI`|b-Nq$Ze_3wT?M;q zS$}u2q;AKl5F2_d@YBY9Cmd+j6yGeNeGt8#>g2omeh76-`~TGHZ^ob2x9A0#^)0-a z=Fq1F|HNAs=&|Yc_ZmBb&OUQx_+qaybX?-p3#ol5P ztl^?)!%vfE%$qQZE=_cfER3Y|z}h}0*p(m|{iH7Olt0{hG z03S1c2wjr5vOKMLFwL7ZGkaT;nqC_bu!?R`)7YEK*R1hT)BT@3{_pVZPBcIEg43d* z%RFh_)T}{8j*fKwx~o4dP&(3MtK(FcK|${&6rcFdG;69iQ_!7918krCc@|@m%ng&$ z+&0Ei)ki{J2x-I{BOor693@53Qs#)Ic;mRS{f>Z`Yv)8I9g7C+`I8b2xV$iKJ6BReg{VZFeua0h$X#3k0RIh)4^v3qyZ-Fs{ S^^1#%&azG^Tc>B_-v0rGKV{(n delta 4102 zcmai130PCd7S7B7-Vm24TLi%?R0x|UE)m&`wB&OOVVGiT1- zwWgDOWv47HG!C;y40TIj`yz$#0P$a;NR;RE^b(zw>87(#L@qwNkF?(8x9?lG-#oZo zDs^|Q%i_*L_M(r~8eXjdQHMzi4}Hi~2UrdO zsdIoxZren!NVQQCsgXxk7};6*@Q6Gn2zEX0mj7k=sO$8D^E5A0+i*)+}hN@VZfAw+5 zM;p*AV6hM7Sd>cQEwEhWt}w~#nPm0@*pryEA%n62vRDI2trBUKMtR*#taKfiNv~`e zMv8}mTt1b9I>cR%(4oN&zy(GgAepUDL$U}=SJhvT>`l!^wdCwtjsFq8{e&j(n=;ZxNpc){1 z2_(OJ!L1g%2MOK}BZ$)mJ;;($s3o0CVV7QEG{}M|H6w&up%AiIE&SwpI6t%$h60+7 z=N%xPG@Jw5Nss+dmodoaqdK?84vx|Dfr=nCBexJ zb9XXxP5wCHmZ5oXU@}(fR}R~p zF%JKV`xsn7n)gGT@j;opKNq9Hov#|d8#h;rd%U~;l?}c_u$%8bd*abCIOGK* zcme%+eiz?Bng1^zNv=F*;8D*l{L|be;GGGv)!x-6Z20D-PxW0U;Xn18LvkJ^fv9G!OMTThx`3E1xMf2S8F#FqV_)Yg(JREnaaUXnotO4hpc#?Z4-GJwQa9KA) z`!b$&qNv5~PAskoEivxNxD=1)Hm*APVBjD;8LOLWSH|E%H_gP0KMcfgw#`SwLqqTs zdE7Cj^<22x7q1wwVtHW)Z@l%oJ@|XGH?G{-$NFj?4JIo6k{rD^o;<_W>ze{!eDA^1 z2l*+!_`s7%Cl;`I!vj+^C$N8hOXEZX6V&!{e)cb_`k2ahDPb;vLL%2HIcZGAWV zM@tkGS!Jj|B9?uYJ{+_*YQ`ev-1}Sl`Nf(#Zxw{UGZsBh5;{C$CYFU8Som9n!OpD2jZM^j~_s}4zEl2y68q#9PW5^*dF#| zf2^vQcy0Ro{W0rT>3MZ_G;Vc|=NH$$gwrZ5JvtYJ;hm2zexeBs!8ucF%p2Da#Gku2 zjm}&XhPThT*OIXz4BwrWw{Cr0EPi8Lz@SLf8wV6#d8;w9DI8Z1_Ru?V1g_}4I^b?e zZ~UH7ZFtEi3>$Jwr{~Y~#*y-9u?{AgUT{9DErC2ObDaQeD&Yqof^~q*KLT$Nrv)-9 zoqXBURtmM0=ng||aGXpt&5>>td8&ANYm#M$VH1fz16a;KI!@^0Mz7g9t`8yG=`@dX~|}7r2J3#Eh(*m{^6@@BsqyfwH2Q8 z(p;{$3UiAzPqSLj1Fkqot!fh46pAShXqgtMGX5qp=;$3tr}ckG)3!e$2{;4i0UL=r z4J%M$z2Izi1cRDRiEcNA(mS~6C-FK-!whhekJ?~QPVG>_(;~5UKZ6u!C|oGq|)a9u*C|=N6O;Cffnju+m9R{MSzeBQn2@LXVZ;}w# zFfhndBefRk{@&?U_vwIW#g~?Mcxyot*)-#8OK;k=oDcE~NiR$s4Gb-N@pLFbCN9yapj<{4c>Z zbpIax0#^~(05?I4jnI`joUwpaGEMTn3ymBx{|dj7Sl{KGBN@V8h78CgTQ0kFv^9#V zi^CaUBFU62gd**-NKBi^AoDMR4T79pxW&d(owTbcZ~?0ed~f5UuD}sMX}%_@r+Lu| zI8)4gbeQhG&`~_O0X6IcSV}WIm^O&QsfsU^PgMe??x_Mt zNn15+;P0M*8vvQ2qHnlLFe=g4HC4w$-4F92N#A z86`B?EbTps*%O^b^9A)G!U?CX^I|GQ3iLbFkb^PD03Y844Mm*kNP_a+QYlUhH#^Ob zGak>h*gK20l@_}kTI}eyoS{7|(2*QB6sAg(xE00TQpfL&7(y}2u`e^c$p_^FbXCs648GqN7S5`4sqYmlh>3awvASclu}|FTX;vd!ZeCW(evAxMl9Xs%PBe1{L&H zRz*o}s(H&p&;`P@NRkkRs^GtP(lHv9pt4BOCh;KjK~`%gy2nvyK5!$dXtZ70?`J-d z1)0d%4_SA2f~08xV&G~$xjq0@AvP9qf%gi?%oyZ}bVgCCaMD;TK*BcDf;2#cXc_GB@cdbpvNln2)rvXkYYjdZX)N`5p%JCSVgnyBkyu?#9L?9lv-S^*+8C-Mrko g#NB8tjCfec?7+`|VO#8aQBs^N2^pzbj|Xi1A9IRs?EnA( From 8c60903ed2a91486bfa6d488ed5142190b848006 Mon Sep 17 00:00:00 2001 From: Ilham Variansyah Date: Wed, 16 Oct 2024 04:05:54 +0700 Subject: [PATCH 3/6] fixing some numba errors --- mcdc/kernel.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mcdc/kernel.py b/mcdc/kernel.py index 4eacdc75..c6fd4e08 100644 --- a/mcdc/kernel.py +++ b/mcdc/kernel.py @@ -1998,7 +1998,7 @@ def dd_reduce(data, mcdc): dd_group.Free() if MPI.COMM_NULL != dd_comm: dd_comm.Free() - + @njit def tally_reduce(data, mcdc): @@ -2020,10 +2020,11 @@ def tally_reduce(data, mcdc): else: # find number of subdomains N_dd = 1 - for name in ["x", "y", "z", "t", "mu", "azi"]: - N_dd *= mcdc["technique"]["dd_mesh"][name].size - 1 + N_dd *= mcdc["technique"]["dd_mesh"]["x"].size - 1 + N_dd *= mcdc["technique"]["dd_mesh"]["y"].size - 1 + N_dd *= mcdc["technique"]["dd_mesh"]["z"].size - 1 # DD Reduce if multiple processors per subdomain - if N_dd != MPI.COMM_WORLD.Get_size(): + if N_dd != mcdc["mpi_size"]: dd_reduce(data, mcdc) @@ -2041,6 +2042,7 @@ def tally_accumulate(data, mcdc): # Reset score bin tally_bin[TALLY_SCORE, i] = 0.0 + @njit def dd_closeout(data, mcdc): tally_bin = data[TALLY] @@ -2070,12 +2072,13 @@ def dd_closeout(data, mcdc): if mcdc["dd_idx"] == n: tally_bin[TALLY_SUM] = buff tally_bin[TALLY_SUM_SQ] = buff_sq * len(dd_ranks) - + # free comm group dd_group.Free() if MPI.COMM_NULL != dd_comm: dd_comm.Free() + @njit def tally_closeout(data, mcdc): tally = data[TALLY] From 518090d0d07c241d0f0d256da91405fed30fe4a0 Mon Sep 17 00:00:00 2001 From: alexandermote Date: Tue, 15 Oct 2024 15:07:08 -0700 Subject: [PATCH 4/6] numba fix + back in black --- mcdc/kernel.py | 104 +++++++++++++------------- mcdc/main.py | 22 +++--- test/regression/dd_slab_reed/input.py | 2 +- 3 files changed, 66 insertions(+), 62 deletions(-) diff --git a/mcdc/kernel.py b/mcdc/kernel.py index 4eacdc75..4812b577 100644 --- a/mcdc/kernel.py +++ b/mcdc/kernel.py @@ -1977,28 +1977,28 @@ def dd_reduce(data, mcdc): d_Ny = mcdc["technique"]["dd_mesh"]["y"].size - 1 d_Nz = mcdc["technique"]["dd_mesh"]["z"].size - 1 - # assign processors to their subdomain group - i = 0 - for n in range(d_Nx * d_Ny * d_Nz): - dd_group = [] - for r in range(int(mcdc["technique"]["dd_work_ratio"][n])): - dd_group.append(i) - i += 1 - # create MPI Comm group out of subdomain processors - dd_group = MPI.COMM_WORLD.group.Incl(dd_group) - dd_comm = MPI.COMM_WORLD.Create(dd_group) - # MPI Reduce on subdomain processors - buff = np.zeros_like(tally_bin[TALLY_SCORE]) - if MPI.COMM_NULL != dd_comm: - with objmode(): + with objmode(): + # assign processors to their subdomain group + i = 0 + for n in range(d_Nx * d_Ny * d_Nz): + dd_group = [] + for r in range(int(mcdc["technique"]["dd_work_ratio"][n])): + dd_group.append(i) + i += 1 + # create MPI Comm group out of subdomain processors + dd_group = MPI.COMM_WORLD.group.Incl(dd_group) + dd_comm = MPI.COMM_WORLD.Create(dd_group) + # MPI Reduce on subdomain processors + buff = np.zeros_like(tally_bin[TALLY_SCORE]) + if MPI.COMM_NULL != dd_comm: dd_comm.Reduce(tally_bin[TALLY_SCORE], buff, MPI.SUM, 0) - if mcdc["dd_idx"] == n: - tally_bin[TALLY_SCORE][:] = buff - # free comm group - dd_group.Free() - if MPI.COMM_NULL != dd_comm: - dd_comm.Free() - + if mcdc["dd_idx"] == n: + tally_bin[TALLY_SCORE][:] = buff + # free comm group + dd_group.Free() + if MPI.COMM_NULL != dd_comm: + dd_comm.Free() + @njit def tally_reduce(data, mcdc): @@ -2020,10 +2020,11 @@ def tally_reduce(data, mcdc): else: # find number of subdomains N_dd = 1 - for name in ["x", "y", "z", "t", "mu", "azi"]: - N_dd *= mcdc["technique"]["dd_mesh"][name].size - 1 + N_dd *= mcdc["technique"]["dd_mesh"]["x"].size - 1 + N_dd *= mcdc["technique"]["dd_mesh"]["y"].size - 1 + N_dd *= mcdc["technique"]["dd_mesh"]["z"].size - 1 # DD Reduce if multiple processors per subdomain - if N_dd != MPI.COMM_WORLD.Get_size(): + if N_dd != mcdc["mpi_size"]: dd_reduce(data, mcdc) @@ -2041,6 +2042,7 @@ def tally_accumulate(data, mcdc): # Reset score bin tally_bin[TALLY_SCORE, i] = 0.0 + @njit def dd_closeout(data, mcdc): tally_bin = data[TALLY] @@ -2050,31 +2052,32 @@ def dd_closeout(data, mcdc): d_Ny = mcdc["technique"]["dd_mesh"]["y"].size - 1 d_Nz = mcdc["technique"]["dd_mesh"]["z"].size - 1 - # assign processors to their subdomain group - i = 0 - for n in range(d_Nx * d_Ny * d_Nz): - dd_ranks = [] - for r in range(int(mcdc["technique"]["dd_work_ratio"][n])): - dd_ranks.append(i) - i += 1 - # create MPI Comm group out of subdomain processors - dd_group = MPI.COMM_WORLD.group.Incl(dd_ranks) - dd_comm = MPI.COMM_WORLD.Create(dd_group) - # MPI Reduce on subdomain processors - buff = np.zeros_like(tally_bin[TALLY_SUM]) - buff_sq = np.zeros_like(tally_bin[TALLY_SUM_SQ]) - if MPI.COMM_NULL != dd_comm: - with objmode(): + with objmode(): + # assign processors to their subdomain group + i = 0 + for n in range(d_Nx * d_Ny * d_Nz): + dd_ranks = [] + for r in range(int(mcdc["technique"]["dd_work_ratio"][n])): + dd_ranks.append(i) + i += 1 + # create MPI Comm group out of subdomain processors + dd_group = MPI.COMM_WORLD.group.Incl(dd_ranks) + dd_comm = MPI.COMM_WORLD.Create(dd_group) + # MPI Reduce on subdomain processors + buff = np.zeros_like(tally_bin[TALLY_SUM]) + buff_sq = np.zeros_like(tally_bin[TALLY_SUM_SQ]) + if MPI.COMM_NULL != dd_comm: dd_comm.Reduce(tally_bin[TALLY_SUM], buff, MPI.SUM, 0) dd_comm.Reduce(tally_bin[TALLY_SUM_SQ], buff_sq, MPI.SUM, 0) - if mcdc["dd_idx"] == n: - tally_bin[TALLY_SUM] = buff - tally_bin[TALLY_SUM_SQ] = buff_sq * len(dd_ranks) - - # free comm group - dd_group.Free() - if MPI.COMM_NULL != dd_comm: - dd_comm.Free() + if mcdc["dd_idx"] == n: + tally_bin[TALLY_SUM] = buff + tally_bin[TALLY_SUM_SQ] = buff_sq * len(dd_ranks) + + # free comm group + dd_group.Free() + if MPI.COMM_NULL != dd_comm: + dd_comm.Free() + @njit def tally_closeout(data, mcdc): @@ -2100,10 +2103,11 @@ def tally_closeout(data, mcdc): else: # find number of subdomains N_dd = 1 - for name in ["x", "y", "z", "t", "mu", "azi"]: - N_dd *= mcdc["technique"]["dd_mesh"][name].size - 1 + N_dd *= mcdc["technique"]["dd_mesh"]["x"].size - 1 + N_dd *= mcdc["technique"]["dd_mesh"]["y"].size - 1 + N_dd *= mcdc["technique"]["dd_mesh"]["z"].size - 1 # DD Reduce if multiple processors per subdomain - if N_dd != MPI.COMM_WORLD.Get_size(): + if N_dd != mcdc["mpi_size"]: dd_closeout(data, mcdc) # Calculate and store statistics diff --git a/mcdc/main.py b/mcdc/main.py index 8e1756b3..cba69da0 100644 --- a/mcdc/main.py +++ b/mcdc/main.py @@ -1293,17 +1293,19 @@ def dd_mergetally(mcdc, data): # MPI gather if (d_Nx * d_Ny * d_Nz) == MPI.COMM_WORLD.Get_size(): - sendcounts = np.array(MPI.COMM_WORLD.gather(len(tally[0]),root=0)) - #print(MPI.COMM_WORLD.Get_rank(), tally) + sendcounts = np.array(MPI.COMM_WORLD.gather(len(tally[0]), root=0)) + # print(MPI.COMM_WORLD.Get_rank(), tally) if mcdc["mpi_master"]: dd_tally = np.zeros((tally.shape[0], sum(sendcounts))) else: - dd_tally = np.empty(tally.shape[0]) # dummy tally + dd_tally = np.empty(tally.shape[0]) # dummy tally # gather tallies for i, t in enumerate(tally): - MPI.COMM_WORLD.Gatherv(sendbuf=tally[i], recvbuf=(dd_tally[i], sendcounts), root=0) - #print(dd_tally) - + MPI.COMM_WORLD.Gatherv( + sendbuf=tally[i], recvbuf=(dd_tally[i], sendcounts), root=0 + ) + # print(dd_tally) + # MPI gather for multiprocessor subdomains else: i = 0 @@ -1315,11 +1317,10 @@ def dd_mergetally(mcdc, data): # create MPI comm group for nonzero tallies dd_group = MPI.COMM_WORLD.group.Incl(dd_ranks) dd_comm = MPI.COMM_WORLD.Create(dd_group) - dd_tally = np.empty(tally.shape[0]) # dummy tally + dd_tally = np.empty(tally.shape[0]) # dummy tally if MPI.COMM_NULL != dd_comm: - sendcounts = np.array(dd_comm.gather(len(tally[0]),root=0)) - #print(MPI.COMM_WORLD.Get_rank(), tally) + sendcounts = np.array(dd_comm.gather(len(tally[0]), root=0)) if mcdc["mpi_master"]: dd_tally = np.zeros((tally.shape[0], sum(sendcounts))) # gather tallies @@ -1328,8 +1329,7 @@ def dd_mergetally(mcdc, data): dd_group.Free() if MPI.COMM_NULL != dd_comm: dd_comm.Free() - #print(dd_tally) - + if mcdc["mpi_master"]: buff = np.zeros_like(dd_tally) # reorganize tally data diff --git a/test/regression/dd_slab_reed/input.py b/test/regression/dd_slab_reed/input.py index 19e0c405..e15582b2 100644 --- a/test/regression/dd_slab_reed/input.py +++ b/test/regression/dd_slab_reed/input.py @@ -46,7 +46,7 @@ # Setting mcdc.setting(N_particle=5000) -dd_z = np.array([0.0,2.0,3.0,5.0,8.0]) +dd_z = np.array([0.0, 2.0, 3.0, 5.0, 8.0]) mcdc.domain_decomposition(z=dd_z) # Run mcdc.run() From e498ccd91b7ee40db642e90bd014af617c716a2c Mon Sep 17 00:00:00 2001 From: alexandermote Date: Tue, 15 Oct 2024 15:30:32 -0700 Subject: [PATCH 5/6] back in black --- mcdc/kernel.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mcdc/kernel.py b/mcdc/kernel.py index 6bc2c38e..4812b577 100644 --- a/mcdc/kernel.py +++ b/mcdc/kernel.py @@ -2069,14 +2069,14 @@ def dd_closeout(data, mcdc): if MPI.COMM_NULL != dd_comm: dd_comm.Reduce(tally_bin[TALLY_SUM], buff, MPI.SUM, 0) dd_comm.Reduce(tally_bin[TALLY_SUM_SQ], buff_sq, MPI.SUM, 0) - if mcdc["dd_idx"] == n: - tally_bin[TALLY_SUM] = buff - tally_bin[TALLY_SUM_SQ] = buff_sq * len(dd_ranks) - - # free comm group - dd_group.Free() - if MPI.COMM_NULL != dd_comm: - dd_comm.Free() + if mcdc["dd_idx"] == n: + tally_bin[TALLY_SUM] = buff + tally_bin[TALLY_SUM_SQ] = buff_sq * len(dd_ranks) + + # free comm group + dd_group.Free() + if MPI.COMM_NULL != dd_comm: + dd_comm.Free() @njit From 671b276ba1a9876493310d6874b408a4753886cf Mon Sep 17 00:00:00 2001 From: alexandermote Date: Tue, 15 Oct 2024 21:30:19 -0700 Subject: [PATCH 6/6] added mesh recomposition in postprocessing --- mcdc/main.py | 115 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 3 deletions(-) diff --git a/mcdc/main.py b/mcdc/main.py index cba69da0..ffff4b02 100644 --- a/mcdc/main.py +++ b/mcdc/main.py @@ -1358,10 +1358,100 @@ def dd_mergetally(mcdc, data): return dd_tally +def dd_mergemesh(mcdc, data): + """ + Performs mesh recombination on domain-decomposed mesh tallies. + Gathers and re-organizes mesh data into a single array as it + would appear in a non-decomposed simulation. + """ + d_Nx = input_deck.technique["dd_mesh"]["x"].size - 1 + d_Ny = input_deck.technique["dd_mesh"]["y"].size - 1 + d_Nz = input_deck.technique["dd_mesh"]["z"].size - 1 + # gather mesh filter + if d_Nx > 1: + sendcounts = np.array( + MPI.COMM_WORLD.gather(len(mcdc["mesh_tallies"][0]["filter"]["x"]), root=0) + ) + if mcdc["mpi_master"]: + x_filter = np.zeros((mcdc["mesh_tallies"].shape, sum(sendcounts))) + else: + x_filter = np.empty((mcdc["mesh_tallies"].shape)) # dummy tally + # gather mesh + for i in range(mcdc["mesh_tallies"].shape): + MPI.COMM_WORLD.Gatherv( + sendbuf=mcdc["mesh_tallies"][i]["filter"]["x"], + recvbuf=(x_filter[i], sendcounts), + root=0, + ) + if mcdc["mpi_master"]: + x_final = np.zeros((mcdc["mesh_tallies"].shape[0], x_filter.shape[1] + 1)) + x_final[:, 0] = mcdc["mesh_tallies"][:]["filter"]["x"][0][0] + x_final[:, 1:] = x_filter + + if d_Ny > 1: + sendcounts = np.array( + MPI.COMM_WORLD.gather(len(mcdc["mesh_tallies"][0]["filter"]["y"]), root=0) + ) + if mcdc["mpi_master"]: + y_filter = np.zeros((mcdc["mesh_tallies"].shape[0], sum(sendcounts))) + else: + y_filter = np.empty((mcdc["mesh_tallies"].shape[0])) # dummy tally + # gather mesh + for i in range(mcdc["mesh_tallies"].shape): + MPI.COMM_WORLD.Gatherv( + sendbuf=mcdc["mesh_tallies"][i]["filter"]["y"], + recvbuf=(y_filter[i], sendcounts), + root=0, + ) + if mcdc["mpi_master"]: + y_final = np.zeros((mcdc["mesh_tallies"].shape[0], y_filter.shape[1] + 1)) + y_final[:, 0] = mcdc["mesh_tallies"][:]["filter"]["y"][0][0] + y_final[:, 1:] = y_filter + + if d_Nz > 1: + sendcounts = np.array( + MPI.COMM_WORLD.gather( + len(mcdc["mesh_tallies"][0]["filter"]["z"]) - 1, root=0 + ) + ) + if mcdc["mpi_master"]: + z_filter = np.zeros((mcdc["mesh_tallies"].shape[0], sum(sendcounts))) + else: + z_filter = np.empty((mcdc["mesh_tallies"].shape[0])) # dummy tally + # gather mesh + for i in range(mcdc["mesh_tallies"].shape[0]): + MPI.COMM_WORLD.Gatherv( + sendbuf=mcdc["mesh_tallies"][i]["filter"]["z"][1:], + recvbuf=(z_filter[i], sendcounts), + root=0, + ) + if mcdc["mpi_master"]: + z_final = np.zeros((mcdc["mesh_tallies"].shape[0], z_filter.shape[1] + 1)) + z_final[:, 0] = mcdc["mesh_tallies"][:]["filter"]["z"][0][0] + z_final[:, 1:] = z_filter + + dd_mesh = [] + if mcdc["mpi_master"]: + if d_Nx > 1: + dd_mesh.append(x_final) + else: + dd_mesh.append(mcdc["mesh_tallies"][:]["filter"]["x"]) + if d_Ny > 1: + dd_mesh.append(y_final) + else: + dd_mesh.append(mcdc["mesh_tallies"][:]["filter"]["y"]) + if d_Nz > 1: + dd_mesh.append(z_final) + else: + dd_mesh.append("z", mcdc["mesh_tallies"][:]["filter"]["z"]) + return dd_mesh + + def generate_hdf5(data, mcdc): if mcdc["technique"]["domain_decomposition"]: dd_tally = dd_mergetally(mcdc, data) + dd_mesh = dd_mergemesh(mcdc, data) if mcdc["mpi_master"]: if mcdc["setting"]["progress_bar"]: @@ -1399,15 +1489,34 @@ def generate_hdf5(data, mcdc): mesh = tally["filter"] f.create_dataset("tallies/mesh_tally_%i/grid/t" % ID, data=mesh["t"]) - f.create_dataset("tallies/mesh_tally_%i/grid/x" % ID, data=mesh["x"]) - f.create_dataset("tallies/mesh_tally_%i/grid/y" % ID, data=mesh["y"]) - f.create_dataset("tallies/mesh_tally_%i/grid/z" % ID, data=mesh["z"]) f.create_dataset("tallies/mesh_tally_%i/grid/mu" % ID, data=mesh["mu"]) f.create_dataset( "tallies/mesh_tally_%i/grid/azi" % ID, data=mesh["azi"] ) f.create_dataset("tallies/mesh_tally_%i/grid/g" % ID, data=mesh["g"]) + if not mcdc["technique"]["domain_decomposition"]: + f.create_dataset( + "tallies/mesh_tally_%i/grid/x" % ID, data=mesh["x"] + ) + f.create_dataset( + "tallies/mesh_tally_%i/grid/y" % ID, data=mesh["y"] + ) + f.create_dataset( + "tallies/mesh_tally_%i/grid/z" % ID, data=mesh["z"] + ) + else: + # substitute recomposed mesh + f.create_dataset( + "tallies/mesh_tally_%i/grid/x" % ID, data=dd_mesh[0][ID] + ) + f.create_dataset( + "tallies/mesh_tally_%i/grid/y" % ID, data=dd_mesh[1][ID] + ) + f.create_dataset( + "tallies/mesh_tally_%i/grid/z" % ID, data=dd_mesh[2][ID] + ) + # Shape Nmu = len(mesh["mu"]) - 1 N_azi = len(mesh["azi"]) - 1