Skip to content

Commit

Permalink
Precompile AVM kernels (#330)
Browse files Browse the repository at this point in the history
  • Loading branch information
MalachiTimothyPhillips authored Nov 3, 2021
1 parent 93db341 commit b22cbbb
Show file tree
Hide file tree
Showing 17 changed files with 147 additions and 146 deletions.
2 changes: 1 addition & 1 deletion okl/cds/regularization/relativeMassHighestMode.okl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
for(int j = 0; j < p_Nq; ++j; @inner(1)) {
for(int i = 0; i < p_Nq; ++i; @inner(0)) {
const int id = i + j * p_Nq;
s_FT[0][id] = fMT[id + is * p_Nq * p_Nq];
s_FT[j][i] = fMT[id + is * p_Nq * p_Nq];

#pragma unroll p_Nq
for(int k = 0; k < p_Nq; ++k)
Expand Down
12 changes: 12 additions & 0 deletions src/cds/registerCdsKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,18 @@ void registerCdsKernels(occa::properties kernelInfoBC) {
occa::properties meshProps = kernelInfo;
meshProps += meshKernelProperties(N);
{
kernelName = "relativeMassHighestMode";
fileName = oklpath + "cds/regularization/" + kernelName + ".okl";
platform->kernels.add(kernelName, fileName, meshProps);

kernelName = "computeMaxVisc";
fileName = oklpath + "cds/regularization/" + kernelName + ".okl";
platform->kernels.add(kernelName, fileName, meshProps);

kernelName = "interpolateP1";
fileName = oklpath + "cds/regularization/" + kernelName + ".okl";
platform->kernels.add(kernelName, fileName, meshProps);

{
occa::properties prop = meshProps;
prop["defines/p_cubNq"] = cubNq;
Expand Down
2 changes: 1 addition & 1 deletion src/core/kernelRequestManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ kernelRequestManager_t::add(kernelRequest_t request, bool checkUnique)
fileNameToRequestMap[fileName].insert(request);
}
occa::kernel
kernelRequestManager_t::getKernel(const std::string& request, bool checkValid) const
kernelRequestManager_t::get(const std::string& request, bool checkValid) const
{
if(checkValid){
bool issueError = 0;
Expand Down
2 changes: 1 addition & 1 deletion src/core/kernelRequestManager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class kernelRequestManager_t
void compile();

occa::kernel
getKernel(const std::string& request, bool checkValid = true) const;
get(const std::string& request, bool checkValid = true) const;

bool
processed() const { return kernelsProcessed; }
Expand Down
98 changes: 49 additions & 49 deletions src/core/setup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -470,137 +470,137 @@ void nrsSetup(MPI_Comm comm, setupAide &options, nrs_t *nrs)
const std::string section = "nrs-";
kernelName = "nStagesSum3";
nrs->nStagesSum3Kernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "computeFieldDotNormal";
nrs->computeFieldDotNormalKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "computeFaceCentroid";
nrs->computeFaceCentroidKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

{
kernelName = "strongAdvectionVolume" + suffix;
nrs->advectionStrongVolumeKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);
kernelName = "strongAdvectionCubatureVolume" + suffix;
nrs->advectionStrongCubatureVolumeKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);
}

kernelName = "curl" + suffix;
nrs->curlKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "gradientVolume" + suffix;
nrs->gradientVolumeKernel = platform->kernels.getKernel( section + kernelName);
nrs->gradientVolumeKernel = platform->kernels.get( section + kernelName);

kernelName = "nrswGradientVolume" + suffix;
nrs->wgradientVolumeKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

{
kernelName = "sumMakef";
nrs->sumMakefKernel = platform->kernels.getKernel( section + kernelName);
nrs->sumMakefKernel = platform->kernels.get( section + kernelName);
}

kernelName = "nrswDivergenceVolume" + suffix;
nrs->wDivergenceVolumeKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);
kernelName = "divergenceVolume" + suffix;
nrs->divergenceVolumeKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "divergenceSurfaceTOMBO" + suffix;
nrs->divergenceSurfaceKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "advectMeshVelocityHex3D";
nrs->advectMeshVelocityKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "pressureRhsTOMBO" + suffix;
nrs->pressureRhsKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "pressureStress" + suffix;
nrs->pressureStressKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "pressureDirichletBC" + suffix;
nrs->pressureDirichletBCKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "velocityRhsTOMBO" + suffix;
nrs->velocityRhsKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "velocityDirichletBC" + suffix;
nrs->velocityDirichletBCKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "velocityNeumannBC" + suffix;
nrs->velocityNeumannBCKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "UrstCubature" + suffix;
nrs->UrstCubatureKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "Urst" + suffix;
nrs->UrstKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);


if(nrs->Nsubsteps){
kernelName = "subCycleStrongCubatureVolume" + suffix;
nrs->subCycleStrongCubatureVolumeKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);
kernelName = "subCycleStrongVolume" + suffix;
nrs->subCycleStrongVolumeKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "subCycleERKUpdate";
nrs->subCycleRKUpdateKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);
kernelName = "subCycleRK";
nrs->subCycleRKKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "subCycleInitU0";
nrs->subCycleInitU0Kernel = platform->kernels.getKernel( section + kernelName);
nrs->subCycleInitU0Kernel = platform->kernels.get( section + kernelName);
}

kernelName = "multiExtrapolate";
nrs->extrapolateKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "maskCopy";
nrs->maskCopyKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);
kernelName = "mask";
nrs->maskKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "filterRT" + suffix;
nrs->filterRTKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "cfl" + suffix;
nrs->cflKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "pressureAddQtl";
nrs->pressureAddQtlKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "setEllipticCoeff";
nrs->setEllipticCoeffKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);
kernelName = "setEllipticCoeffPressure";
nrs->setEllipticCoeffPressureKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);
}

MPI_Barrier(platform->comm.mpiComm);
Expand Down Expand Up @@ -1252,58 +1252,58 @@ cds_t* cdsSetup(nrs_t* nrs, setupAide options)
{
kernelName = "strongAdvectionVolume" + suffix;
cds->advectionStrongVolumeKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "strongAdvectionCubatureVolume" + suffix;
cds->advectionStrongCubatureVolumeKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "advectMeshVelocityHex3D";
cds->advectMeshVelocityKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "maskCopy";
cds->maskCopyKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

{
kernelName = "sumMakef";
cds->sumMakefKernel = platform->kernels.getKernel( section + kernelName);
cds->sumMakefKernel = platform->kernels.get( section + kernelName);
}

kernelName = "helmholtzBC" + suffix;
cds->helmholtzRhsBCKernel = platform->kernels.getKernel( section + kernelName);
cds->helmholtzRhsBCKernel = platform->kernels.get( section + kernelName);
kernelName = "dirichletBC";
cds->dirichletBCKernel = platform->kernels.getKernel( section + kernelName);
cds->dirichletBCKernel = platform->kernels.get( section + kernelName);

kernelName = "setEllipticCoeff";
cds->setEllipticCoeffKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "filterRT" + suffix;
cds->filterRTKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

kernelName = "nStagesSum3";
cds->nStagesSum3Kernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);

if(cds->Nsubsteps) {
kernelName = "subCycleStrongCubatureVolume" + suffix;
cds->subCycleStrongCubatureVolumeKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);
kernelName = "subCycleStrongVolume" + suffix;
cds->subCycleStrongVolumeKernel =
platform->kernels.getKernel( section + kernelName);
platform->kernels.get( section + kernelName);


kernelName = "subCycleERKUpdate";
cds->subCycleRKUpdateKernel = platform->kernels.getKernel( section + kernelName);
cds->subCycleRKUpdateKernel = platform->kernels.get( section + kernelName);
kernelName = "subCycleRK";
cds->subCycleRKKernel = platform->kernels.getKernel( section + kernelName);
cds->subCycleRKKernel = platform->kernels.get( section + kernelName);

kernelName = "subCycleInitU0";
cds->subCycleInitU0Kernel = platform->kernels.getKernel( section + kernelName);
cds->subCycleInitU0Kernel = platform->kernels.get( section + kernelName);
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/elliptic/amgSolver/parAlmond/coarseSolver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,13 @@ void coarseSolver::setup(

{
std::string kernelName = "convertFP64ToFP32";
convertFP64ToFP32Kernel = platform->kernels.getKernel(kernelName);
convertFP64ToFP32Kernel = platform->kernels.get(kernelName);

kernelName = "convertFP32ToFP64";
convertFP32ToFP64Kernel = platform->kernels.getKernel(kernelName);
convertFP32ToFP64Kernel = platform->kernels.get(kernelName);

kernelName = "vectorDotStar2";
vectorDotStarKernel2 = platform->kernels.getKernel(kernelName);
vectorDotStarKernel2 = platform->kernels.get(kernelName);
}


Expand Down
8 changes: 4 additions & 4 deletions src/elliptic/ellipticBuildMultigridLevel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,12 @@ elliptic_t* ellipticBuildMultigridLevel(elliptic_t* baseElliptic, int Nc, int Nf

{
const std::string kernelSuffix = gen_suffix(elliptic, dfloatString);
elliptic->AxKernel = platform->kernels.getKernel(poissonPrefix + kernelName + kernelSuffix);
elliptic->AxKernel = platform->kernels.get(poissonPrefix + kernelName + kernelSuffix);
}
{
const std::string kernelSuffix = gen_suffix(elliptic, pfloatString);
elliptic->AxPfloatKernel =
platform->kernels.getKernel(poissonPrefix + kernelName + kernelSuffix);
platform->kernels.get(poissonPrefix + kernelName + kernelSuffix);
}
}

Expand All @@ -103,9 +103,9 @@ elliptic_t* ellipticBuildMultigridLevel(elliptic_t* baseElliptic, int Nc, int Nf
const std::string kernelSuffix = std::string("_") + std::to_string(Nf);

kernelName = "ellipticPreconCoarsen" + suffix;
elliptic->precon->coarsenKernel = platform->kernels.getKernel(kernelName + kernelSuffix);
elliptic->precon->coarsenKernel = platform->kernels.get(kernelName + kernelSuffix);
kernelName = "ellipticPreconProlongate" + suffix;
elliptic->precon->prolongateKernel = platform->kernels.getKernel(kernelName + kernelSuffix);
elliptic->precon->prolongateKernel = platform->kernels.get(kernelName + kernelSuffix);

}

Expand Down
4 changes: 2 additions & 2 deletions src/elliptic/ellipticBuildMultigridLevelFine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,12 @@ elliptic_t* ellipticBuildMultigridLevelFine(elliptic_t* baseElliptic)

{
const std::string kernelSuffix = gen_suffix(elliptic, dfloatString);
elliptic->AxKernel = platform->kernels.getKernel(poissonPrefix + kernelName + kernelSuffix);
elliptic->AxKernel = platform->kernels.get(poissonPrefix + kernelName + kernelSuffix);
}
{
const std::string kernelSuffix = gen_suffix(elliptic, pfloatString);
elliptic->AxPfloatKernel =
platform->kernels.getKernel(poissonPrefix + kernelName + kernelSuffix);
platform->kernels.get(poissonPrefix + kernelName + kernelSuffix);
}
}

Expand Down
Loading

0 comments on commit b22cbbb

Please sign in to comment.