Skip to content

Commit

Permalink
enable coalesced access in pre & postFDM and bug fix in fusedFDM (#121)
Browse files Browse the repository at this point in the history
  • Loading branch information
pwang234 authored Jul 25, 2020
1 parent 1462cd1 commit 04f17fd
Showing 1 changed file with 13 additions and 17 deletions.
30 changes: 13 additions & 17 deletions src/libP/solvers/elliptic/okl/ellipticSchwarzSolverHex3D.okl
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,10 @@
}
}
@barrier("local");
for(int k = 0; k < p_Nq_e; ++k; @inner) {
for(int k = 0; k < p_Nq; ++k) {
for(int j = 0; j < p_Nq_e; ++j; @inner) {
if(k < p_Nq && j < p_Nq) {
#pragma unroll
for(int i = 0; i < p_Nq; ++i) {
for(int i = 0; i < p_Nq_e; ++i; @inner) {
if(i < p_Nq && j < p_Nq) {
const dlong elem_offset = elem * p_Nq * p_Nq * p_Nq;
const dlong idx = i + j * p_Nq + k * p_Nq * p_Nq + elem_offset;
sWork1[k + 1][j + 1][i + 1] = static_cast < pfloat > (u[idx]);
Expand Down Expand Up @@ -66,10 +65,9 @@
sWork1[i][j][p_Nq_e - l1 - 1] = sWork1[i][j][p_Nq_e - l2 - 1];
}
@barrier("local");
for(int k = 0; k < p_Nq_e; ++k; @inner) {
for(int k = 0; k < p_Nq_e; ++k) {
for(int j = 0; j < p_Nq_e; ++j; @inner) {
#pragma unroll
for(int i = 0; i < p_Nq_e; ++i) {
for(int i = 0; i < p_Nq_e; ++i; @inner) {
const dlong elem_offset = p_Nq_e * p_Nq_e * p_Nq_e * elem;
const dlong idx = i + j * p_Nq_e + k * p_Nq_e * p_Nq_e + elem_offset;
work1[idx] = sWork1[k][j][i];
Expand All @@ -86,10 +84,9 @@
for (dlong elem = 0; elem < Nelements; ++elem; @outer) {
@shared pfloat work1[p_Nq_e][p_Nq_e][p_Nq_e];
@shared pfloat work2[p_Nq_e][p_Nq_e][p_Nq_e];
for(int k = 0; k < p_Nq_e; ++k; @inner) {
for(int k = 0; k < p_Nq_e; ++k) {
for(int j = 0; j < p_Nq_e; ++j; @inner) {
#pragma unroll
for(int i = 0; i < p_Nq_e; ++i) {
for(int i = 0; i < p_Nq_e; ++i; @inner) {
const dlong elem_offset = elem * p_Nq_e * p_Nq_e * p_Nq_e;
const dlong idx = i + j * p_Nq_e + k * p_Nq_e * p_Nq_e + elem_offset;
work1[k][j][i] = my_work2[idx];
Expand Down Expand Up @@ -158,11 +155,10 @@
work1[i][j][p_Nq_e - l2 - 1];
}
@barrier("local");
for(int k = 0; k < p_Nq_e; ++k; @inner) {
for(int k = 0; k < p_Nq; ++k) {
for(int j = 0; j < p_Nq_e; ++j; @inner) {
if(k < p_Nq && j < p_Nq) {
#pragma unroll
for(int i = 0; i < p_Nq; ++i) {
for(int i = 0; i < p_Nq_e; ++i; @inner) {
if(i < p_Nq && j < p_Nq) {
const dlong elem_offset = elem * p_Nq * p_Nq * p_Nq;
const dlong idx = i + j * p_Nq + k * p_Nq * p_Nq + elem_offset;
Su[idx] = work1[k + 1][j + 1][i + 1];
Expand Down Expand Up @@ -363,10 +359,10 @@
}
#else /* if (!p_restrict) */
@barrier("local");
for(int k = 0; k < p_Nq_e; ++k) {
for(int k = 0; k < p_Nq; ++k) {
for(int j = 0; j < p_Nq_e; ++j; @inner) {
for(int i = 0; i < p_Nq; ++i; @inner) {
if(k < p_Nq && j < p_Nq) {
for(int i = 0; i < p_Nq_e; ++i; @inner) {
if(i < p_Nq && j < p_Nq) {
const dlong elem_offset = e * p_Nq * p_Nq * p_Nq;
const dlong idx = i + j * p_Nq + k * p_Nq * p_Nq + elem_offset;
Su[idx] = work1[k + 1][j + 1][i + 1];
Expand Down

0 comments on commit 04f17fd

Please sign in to comment.