Skip to content

Commit

Permalink
basis - add MAGMA ApplyAdd
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremylt committed Aug 9, 2024
1 parent 5e414ad commit 18d93ab
Show file tree
Hide file tree
Showing 11 changed files with 505 additions and 10 deletions.
59 changes: 49 additions & 10 deletions backends/magma/ceed-magma-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
//------------------------------------------------------------------------------
// Basis apply - tensor
//------------------------------------------------------------------------------
static int CeedBasisApply_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u, CeedVector v) {
static int CeedBasisApplyCore_Magma(CeedBasis basis, bool apply_add, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u,
CeedVector v) {
Ceed ceed;
Ceed_Magma *data;
CeedInt dim, num_comp, num_nodes, P_1d, Q_1d, P, Q;
Expand All @@ -52,7 +53,8 @@ static int CeedBasisApply_Magma(CeedBasis basis, CeedInt num_elem, CeedTranspose
// Read vectors
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(e_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));

// Apply basis operation
switch (e_mode) {
Expand Down Expand Up @@ -115,7 +117,8 @@ static int CeedBasisApply_Magma(CeedBasis basis, CeedInt num_elem, CeedTranspose
void *args[] = {&impl->d_interp_1d, &d_u, &u_elem_stride, &u_comp_stride, &d_v, &v_elem_stride, &v_comp_stride, &num_elem};

if (t_mode == CEED_TRANSPOSE) {
CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->InterpTranspose, grid, num_threads, num_t_col, 1, shared_mem, args));
CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, apply_add ? impl->InterpTransposeAdd : impl->InterpTranspose, grid, num_threads, num_t_col,
1, shared_mem, args));
} else {
CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Interp, grid, num_threads, num_t_col, 1, shared_mem, args));
}
Expand Down Expand Up @@ -192,7 +195,8 @@ static int CeedBasisApply_Magma(CeedBasis basis, CeedInt num_elem, CeedTranspose
&v_elem_stride, &v_comp_stride, &v_dim_stride, &num_elem};

if (t_mode == CEED_TRANSPOSE) {
CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->GradTranspose, grid, num_threads, num_t_col, 1, shared_mem, args));
CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, apply_add ? impl->GradTransposeAdd : impl->GradTranspose, grid, num_threads, num_t_col, 1,
shared_mem, args));
} else {
CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Grad, grid, num_threads, num_t_col, 1, shared_mem, args));
}
Expand Down Expand Up @@ -248,6 +252,16 @@ static int CeedBasisApply_Magma(CeedBasis basis, CeedInt num_elem, CeedTranspose
return CEED_ERROR_SUCCESS;
}

static int CeedBasisApply_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u, CeedVector v) {
CeedCallBackend(CeedBasisApplyCore_Magma(basis, false, num_elem, t_mode, e_mode, u, v));
return CEED_ERROR_SUCCESS;
}

static int CeedBasisApplyAdd_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u, CeedVector v) {
CeedCallBackend(CeedBasisApplyCore_Magma(basis, true, num_elem, t_mode, e_mode, u, v));
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Basis apply - tensor AtPoints
//------------------------------------------------------------------------------
Expand All @@ -259,8 +273,8 @@ int CeedBasisApplyAtPoints_Magma(CeedBasis basis, const CeedInt num_elem, const
//------------------------------------------------------------------------------
// Basis apply - non-tensor
//------------------------------------------------------------------------------
static int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u,
CeedVector v) {
static int CeedBasisApplyNonTensorCore_Magma(CeedBasis basis, bool apply_add, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode,
CeedVector u, CeedVector v) {
Ceed ceed;
Ceed_Magma *data;
CeedInt num_comp, num_nodes, num_qpts, P, Q, N;
Expand All @@ -281,7 +295,8 @@ static int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt num_elem, Ceed
// Read vectors
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(e_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));

// Compile kernels for N as needed
CeedInt iN = 0;
Expand Down Expand Up @@ -344,8 +359,10 @@ static int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt num_elem, Ceed
impl->NB_deriv_t[iN]));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_interp_nontensor_n", &impl->Interp[iN]));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_interp_nontensor_t", &impl->InterpTranspose[iN]));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_interp_nontensor_ta", &impl->InterpTransposeAdd[iN]));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_deriv_nontensor_n", &impl->Deriv[iN]));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_deriv_nontensor_t", &impl->DerivTranspose[iN]));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_deriv_nontensor_ta", &impl->DerivTransposeAdd[iN]));
if (!impl->Weight) {
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_weight_nontensor", &impl->Weight));
CeedCallBackend(CeedFree(&weight_kernel_path));
Expand Down Expand Up @@ -388,15 +405,15 @@ static int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt num_elem, Ceed
if (P <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P && Q <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q) {
if (e_mode == CEED_EVAL_INTERP) {
if (t_mode == CEED_TRANSPOSE) {
Kernel = impl->InterpTranspose[iN];
Kernel = apply_add ? impl->InterpTransposeAdd[iN] : impl->InterpTranspose[iN];
NB = impl->NB_interp_t[iN];
} else {
Kernel = impl->Interp[iN];
NB = impl->NB_interp[iN];
}
} else {
if (t_mode == CEED_TRANSPOSE) {
Kernel = impl->DerivTranspose[iN];
Kernel = apply_add ? impl->DerivTransposeAdd[iN] : impl->DerivTranspose[iN];
NB = impl->NB_deriv_t[iN];
} else {
Kernel = impl->Deriv[iN];
Expand All @@ -414,7 +431,7 @@ static int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt num_elem, Ceed
} else {
for (CeedInt d = 0; d < q_comp; d++) {
if (t_mode == CEED_TRANSPOSE) {
const CeedScalar beta = (d > 0) ? 1.0 : 0.0;
const CeedScalar beta = (apply_add || (d > 0)) ? 1.0 : 0.0;
magma_gemm_nontensor(MagmaNoTrans, MagmaNoTrans, P, N, Q, 1.0, d_b + d * P * Q, P, d_u + d * N * Q, Q, beta, d_v, P, data->queue);
} else {
magma_gemm_nontensor(MagmaTrans, MagmaNoTrans, Q, N, P, 1.0, d_b + d * P * Q, P, d_u, P, 0.0, d_v + d * N * Q, Q, data->queue);
Expand Down Expand Up @@ -443,6 +460,18 @@ static int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt num_elem, Ceed
return CEED_ERROR_SUCCESS;
}

static int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u,
CeedVector v) {
CeedCallBackend(CeedBasisApplyNonTensorCore_Magma(basis, false, num_elem, t_mode, e_mode, u, v));
return CEED_ERROR_SUCCESS;
}

static int CeedBasisApplyAddNonTensor_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u,
CeedVector v) {
CeedCallBackend(CeedBasisApplyNonTensorCore_Magma(basis, true, num_elem, t_mode, e_mode, u, v));
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Destroy tensor basis
//------------------------------------------------------------------------------
Expand Down Expand Up @@ -559,22 +588,28 @@ int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const
case 1:
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpn_1d_kernel", &impl->Interp));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpt_1d_kernel", &impl->InterpTranspose));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpta_1d_kernel", &impl->InterpTransposeAdd));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradn_1d_kernel", &impl->Grad));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradt_1d_kernel", &impl->GradTranspose));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradta_1d_kernel", &impl->GradTransposeAdd));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_weight_1d_kernel", &impl->Weight));
break;
case 2:
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpn_2d_kernel", &impl->Interp));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpt_2d_kernel", &impl->InterpTranspose));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpta_2d_kernel", &impl->InterpTransposeAdd));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradn_2d_kernel", &impl->Grad));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradt_2d_kernel", &impl->GradTranspose));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradta_2d_kernel", &impl->GradTransposeAdd));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_weight_2d_kernel", &impl->Weight));
break;
case 3:
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpn_3d_kernel", &impl->Interp));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpt_3d_kernel", &impl->InterpTranspose));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpta_3d_kernel", &impl->InterpTransposeAdd));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradn_3d_kernel", &impl->Grad));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradt_3d_kernel", &impl->GradTranspose));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradta_3d_kernel", &impl->GradTransposeAdd));
CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_weight_3d_kernel", &impl->Weight));
break;
}
Expand All @@ -588,6 +623,7 @@ int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const
CeedCallBackend(CeedBasisSetData(basis, impl));

CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Magma));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAdd_Magma));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAtPoints", CeedBasisApplyAtPoints_Magma));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Magma));
return CEED_ERROR_SUCCESS;
Expand Down Expand Up @@ -650,6 +686,7 @@ int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_node

// Register backend functions
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Magma));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Magma));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Magma));
return CEED_ERROR_SUCCESS;
}
Expand Down Expand Up @@ -711,6 +748,7 @@ int CeedBasisCreateHdiv_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_no

// Register backend functions
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Magma));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Magma));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Magma));
return CEED_ERROR_SUCCESS;
}
Expand Down Expand Up @@ -772,6 +810,7 @@ int CeedBasisCreateHcurl_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_n

// Register backend functions
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Magma));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Magma));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Magma));
return CEED_ERROR_SUCCESS;
}
Expand Down
4 changes: 4 additions & 0 deletions backends/magma/ceed-magma.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,10 @@ typedef struct {
CeedMagmaModule module;
CeedMagmaFunction Interp;
CeedMagmaFunction InterpTranspose;
CeedMagmaFunction InterpTransposeAdd;
CeedMagmaFunction Grad;
CeedMagmaFunction GradTranspose;
CeedMagmaFunction GradTransposeAdd;
CeedMagmaFunction Weight;
CeedScalar *d_interp_1d;
CeedScalar *d_grad_1d;
Expand All @@ -59,8 +61,10 @@ typedef struct {
CeedMagmaModule module[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction Interp[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction InterpTranspose[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction InterpTransposeAdd[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction Deriv[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction DerivTranspose[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction DerivTransposeAdd[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction Weight;
CeedInt NB_interp[MAGMA_NONTENSOR_KERNEL_INSTANCES], NB_interp_t[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedInt NB_deriv[MAGMA_NONTENSOR_KERNEL_INSTANCES], NB_deriv_t[MAGMA_NONTENSOR_KERNEL_INSTANCES];
Expand Down
45 changes: 45 additions & 0 deletions include/ceed/jit-source/magma/magma-basis-grad-1d.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,48 @@ extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(BASIS_MAX_P_Q, MAGMA_MAXTHREADS_
// write V
write_1d<CeedScalar, BASIS_P, BASIS_NUM_COMP>(sV, dV, cstrdV, tx);
}

////////////////////////////////////////////////////////////////////////////////
extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(BASIS_MAX_P_Q, MAGMA_MAXTHREADS_1D)) __global__
void magma_gradta_1d_kernel(const CeedScalar *dTinterp, const CeedScalar *dTgrad, const CeedScalar *dU, const int estrdU, const int cstrdU,
const int dstrdU, CeedScalar *dV, const int estrdV, const int cstrdV, const int dstrdV, const int nelem) {
MAGMA_DEVICE_SHARED(CeedScalar, shared_data)

const int tx = threadIdx.x;
const int ty = threadIdx.y;
const int elem_id = (blockIdx.x * blockDim.y) + ty;

if (elem_id >= nelem) return;

CeedScalar *sU[BASIS_NUM_COMP];
CeedScalar *sV[BASIS_NUM_COMP];

// shift global memory pointers by elem stride
dU += elem_id * estrdU;
dV += elem_id * estrdV;

// assign shared memory pointers
CeedScalar *sT = (CeedScalar *)shared_data;
CeedScalar *sW = sT + BASIS_Q * BASIS_P;
sU[0] = sW + ty * BASIS_NUM_COMP * (BASIS_Q + BASIS_P);
sV[0] = sU[0] + (BASIS_NUM_COMP * 1 * BASIS_Q);
for (int comp = 1; comp < BASIS_NUM_COMP; comp++) {
sU[comp] = sU[comp - 1] + (1 * BASIS_Q);
sV[comp] = sV[comp - 1] + (1 * BASIS_P);
}

// read T
if (ty == 0) {
read_T_trans_gm2sm<BASIS_Q, BASIS_P>(tx, dTgrad, sT);
}

// read U
read_1d<CeedScalar, BASIS_Q, BASIS_NUM_COMP>(dU, cstrdU, sU, tx);

__syncthreads();
magma_grad_1d_device<CeedScalar, BASIS_DIM, BASIS_NUM_COMP, BASIS_Q, BASIS_P>(sT, sU, sV, tx);
__syncthreads();

// sum into V
sum_1d<CeedScalar, BASIS_P, BASIS_NUM_COMP>(sV, dV, cstrdV, tx);
}
51 changes: 51 additions & 0 deletions include/ceed/jit-source/magma/magma-basis-grad-2d.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,54 @@ extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(BASIS_MAX_P_Q, MAGMA_MAXTHREADS_
// write V
write_V_2d<CeedScalar, BASIS_P, 1, BASIS_NUM_COMP, BASIS_P, 0>(dV + (0 * dstrdV), cstrdV, rV, tx);
}

////////////////////////////////////////////////////////////////////////////////
extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(BASIS_MAX_P_Q, MAGMA_MAXTHREADS_2D)) __global__
void magma_gradta_2d_kernel(const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, const CeedScalar *dU, const int estrdU, const int cstrdU,
const int dstrdU, CeedScalar *dV, const int estrdV, const int cstrdV, const int dstrdV, const int nelem) {
MAGMA_DEVICE_SHARED(CeedScalar, shared_data)

const int tx = threadIdx.x;
const int ty = threadIdx.y;
const int elem_id = (blockIdx.x * blockDim.y) + ty;

if (elem_id >= nelem) return;

CeedScalar rU[1][BASIS_NUM_COMP][BASIS_Q] = {0.0}; // here DIM_U = 1, but might be different for a fused operator
CeedScalar rV[1][BASIS_NUM_COMP][BASIS_P] = {0.0}; // here DIM_V = 1, but might be different for a fused operator
CeedScalar rTmp = 0.0;

// shift global memory pointers by elem stride
dU += elem_id * estrdU;
dV += elem_id * estrdV;

// assign shared memory pointers
CeedScalar *sTinterp = (CeedScalar *)shared_data;
CeedScalar *sTgrad = sTinterp + BASIS_Q * BASIS_P;
CeedScalar *sTmp = sTgrad + BASIS_Q * BASIS_P;
sTmp += ty * (BASIS_Q * BASIS_MAX_P_Q);

// read T
if (ty == 0) {
read_T_trans_gm2sm<BASIS_Q, BASIS_P>(tx, dinterp1d, sTinterp);
read_T_trans_gm2sm<BASIS_Q, BASIS_P>(tx, dgrad1d, sTgrad);
}
__syncthreads();

/* read U (idim = 0 for dU, i_DIM = 0 for rU) --
there is a sync at the end of this function */
read_U_2d<CeedScalar, BASIS_Q, 1, BASIS_NUM_COMP, BASIS_Q, 0>(dU + (0 * dstrdU), cstrdU, rU, sTmp, tx);
/* first call (i_DIM = 0, i_DIM_U = 0, i_DIM_V = 0) */
magma_grad_2d_device<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_Q, BASIS_P, BASIS_Q, BASIS_P, 0, 0, 0, true>(sTinterp, sTgrad, rU, rV, tx, rTmp, sTmp);
/* there is a sync at the end of magma_grad_2d_device */

/* read U (idim = 1 for dU, i_DIM = 0 for rU) --
there is a sync at the end of this function */
read_U_2d<CeedScalar, BASIS_Q, 1, BASIS_NUM_COMP, BASIS_Q, 0>(dU + (1 * dstrdU), cstrdU, rU, sTmp, tx);
/* second call (i_DIM = 1, i_DIM_U = 0, i_DIM_V = 0) */
magma_grad_2d_device<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_Q, BASIS_P, BASIS_Q, BASIS_P, 1, 0, 0, true>(sTinterp, sTgrad, rU, rV, tx, rTmp, sTmp);
/* there is a sync at the end of magma_grad_2d_device */

// sum into V
sum_V_2d<CeedScalar, BASIS_P, 1, BASIS_NUM_COMP, BASIS_P, 0>(dV + (0 * dstrdV), cstrdV, rV, tx);
}
Loading

0 comments on commit 18d93ab

Please sign in to comment.