Skip to content

Commit

Permalink
Resolve outstanding issues with buildKernel (#336)
Browse files Browse the repository at this point in the history
* Compile platform kernels through `kernelRequestManager_t`.

* Call `UDF_LoadKernels` collectively
  - Use collective `buildKernel` in `udfLoadKernels`
  - Use collective `buildKernel` in plugins
  - All public facing `device_t::buildKernel` are collective,
    or require a boolean true/false for whether to use the collective version
  • Loading branch information
MalachiTimothyPhillips authored Nov 3, 2021
1 parent b22cbbb commit 8ca79db
Show file tree
Hide file tree
Showing 11 changed files with 82 additions and 50 deletions.
4 changes: 4 additions & 0 deletions examples/ethier/ethier.udf
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ void userq(nrs_t *nrs, dfloat time, occa::memory o_S, occa::memory o_FS)

void UDF_LoadKernels(occa::properties& kernelInfo)
{
// called from all ranks, so MPI collectives are O.K.
int maxRank = platform->comm.mpiRank;
MPI_Allreduce(MPI_IN_PLACE, &maxRank, 1, MPI_INT, MPI_MAX, platform->comm.mpiComm);

setupAide &options = platform->options;

dfloat mue, rho;
Expand Down
8 changes: 8 additions & 0 deletions src/core/compileKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ void compileKernels() {

platform->kernels.compile();

// load platform related kernels
std::string kernelName;
kernelName = "copyDfloatToPfloat";
platform->copyDfloatToPfloatKernel = platform->kernels.get(kernelName);

kernelName = "copyPfloatToDfloat";
platform->copyPfloatToDfloatKernel = platform->kernels.get(kernelName);

MPI_Barrier(platform->comm.mpiComm);
const double loadTime = MPI_Wtime() - tStart;

Expand Down
23 changes: 22 additions & 1 deletion src/core/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ occa::kernel
device_t::buildKernel(const std::string &fileName,
const std::string &kernelName,
const occa::properties &props,
std::string suffix) const
const std::string& suffix) const
{

if(fileName.find(".okl") != std::string::npos){
Expand All @@ -80,6 +80,27 @@ device_t::buildKernel(const std::string &fileName,
}
}

occa::kernel
device_t::buildKernel(const std::string &fileName,
const std::string &kernelName,
const occa::properties &props) const
{

const std::string suffix("");
const bool buildNodeLocal = useNodeLocalCache();
const int rank = buildNodeLocal ? _comm.localRank : _comm.mpiRank;
MPI_Comm localCommunicator = buildNodeLocal ? _comm.mpiCommLocal : _comm.mpiComm;
occa::kernel constructedKernel;
for(int pass = 0; pass < 2; ++pass){
if((pass == 0 && rank == 0) || (pass == 1 && rank != 0)){
constructedKernel = this->buildKernel(fileName, kernelName, props, suffix);
}
MPI_Barrier(localCommunicator);
}
return constructedKernel;

}

occa::kernel
device_t::buildKernel(const std::string &fullPath,
const occa::properties &props,
Expand Down
22 changes: 15 additions & 7 deletions src/core/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,26 +24,34 @@ class device_t {
occa::device& occaDevice() { return _device; }
void finish() { _device.finish(); }

occa::kernel buildKernel(const std::string &fullPath,
const occa::properties &props) const;
occa::kernel buildKernel(const std::string &fullPath,
const occa::properties &props,
const std::string& suffix) const;
occa::kernel buildKernel(const std::string &fullPath,
const occa::properties &props,
const std::string& suffix,
bool buildRank0) const;
occa::kernel buildKernel(const std::string &fullPath,
const occa::properties &props,
bool buildRank0) const;

// collective
occa::kernel buildKernel(const std::string &fileName,
const std::string &kernelName,
const occa::properties &props) const;

bool deviceAtomic;

private:
friend occa::kernel udfBuildKernel(occa::properties, const char*);

// non-collective
occa::kernel buildKernel(const std::string &fullPath,
const occa::properties &props) const;
occa::kernel buildKernel(const std::string &fullPath,
const occa::properties &props,
const std::string& suffix) const;
occa::kernel buildKernel(const std::string &fileName,
const std::string &kernelName,
const occa::properties &props,
std::string suffix = "") const;
const std::string& suffix) const;

occa::kernel buildNativeKernel(const std::string &fileName,
const std::string &kernelName,
const occa::properties &props) const;
Expand Down
8 changes: 6 additions & 2 deletions src/core/kernelRequestManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,9 @@ kernelRequestManager_t::compile()
const std::string fileName = kernelRequest.fileName;
const std::string suffix = kernelRequest.suffix;
const occa::properties props = kernelRequest.props;
auto kernel = device.buildKernel(fileName, props, suffix);

// MPI staging already handled
auto kernel = device.buildKernel(fileName, props, suffix, false);
requestToKernel[requestName] = kernel;
}
}
Expand All @@ -130,7 +132,9 @@ kernelRequestManager_t::compile()
const std::string fileName = kernelRequest.fileName;
const std::string suffix = kernelRequest.suffix;
const occa::properties props = kernelRequest.props;
auto kernel = device.buildKernel(fileName, props, suffix);

// MPI staging already handled
auto kernel = device.buildKernel(fileName, props, suffix, false);
requestToKernel[requestName] = kernel;
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/core/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,11 @@ platform_t::platform_t(setupAide& _options, MPI_Comm _commg, MPI_Comm _comm)
const std::string oklpath = installDir + "/okl/";
kernelName = "copyDfloatToPfloat";
fileName = installDir + "/okl/core/" + kernelName + extension;
this->copyDfloatToPfloatKernel = this->device.buildKernel(fileName, this->kernelInfo);
this->kernels.add(kernelName, fileName, this->kernelInfo);

kernelName = "copyPfloatToDfloat";
fileName = installDir + "/okl/core/" + kernelName + extension;
this->copyPfloatToDfloatKernel = this->device.buildKernel(fileName, this->kernelInfo);
this->kernels.add(kernelName, fileName, this->kernelInfo);
}
void memPool_t::allocate(const dlong offset, const dlong fields)
{
Expand Down
10 changes: 5 additions & 5 deletions src/plugins/RANSktau.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,23 +73,23 @@ void RANSktau::buildKernel(occa::properties kernelInfo)
{
kernelName = "computeHex3D";
fileName = path + kernelName + extension;
computeKernel = platform->device.buildKernel(fileName, kernelInfo);
computeKernel = platform->device.buildKernel(fileName, kernelInfo, true);

kernelName = "SijOijHex3D";
fileName = path + kernelName + extension;
SijOijKernel = platform->device.buildKernel(fileName, kernelInfo);
SijOijKernel = platform->device.buildKernel(fileName, kernelInfo, true);

kernelName = "SijOijMag2";
fileName = path + kernelName + extension;
SijOijMag2Kernel = platform->device.buildKernel(fileName, kernelInfo);
SijOijMag2Kernel = platform->device.buildKernel(fileName, kernelInfo, true);

kernelName = "limit";
fileName = path + kernelName + extension;
limitKernel = platform->device.buildKernel(fileName, kernelInfo);
limitKernel = platform->device.buildKernel(fileName, kernelInfo, true);

kernelName = "mue";
fileName = path + kernelName + extension;
mueKernel = platform->device.buildKernel(fileName, kernelInfo);
mueKernel = platform->device.buildKernel(fileName, kernelInfo, true);
}

int Nscalar;
Expand Down
6 changes: 3 additions & 3 deletions src/plugins/avg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,15 @@ void avg::buildKernel(occa::properties kernelInfo)
{
kernelName = "EX";
fileName = path + kernelName + extension;
EXKernel = platform->device.buildKernel(fileName, kernelInfo);
EXKernel = platform->device.buildKernel(fileName, kernelInfo, true);

kernelName = "EXX";
fileName = path + kernelName + extension;
EXXKernel = platform->device.buildKernel(fileName, kernelInfo);
EXXKernel = platform->device.buildKernel(fileName, kernelInfo, true);

kernelName = "EXY";
fileName = path + kernelName + extension;
EXYKernel = platform->device.buildKernel(fileName, kernelInfo);
EXYKernel = platform->device.buildKernel(fileName, kernelInfo, true);
}
buildKernelCalled = 1;
}
Expand Down
6 changes: 3 additions & 3 deletions src/plugins/lowMach.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ void lowMach::buildKernel(occa::properties kernelInfo)
{
kernelName = "qtlHex3D";
fileName = path + kernelName + extension;
qtlKernel = platform->device.buildKernel(fileName, kernelInfo);
qtlKernel = platform->device.buildKernel(fileName, kernelInfo, true);

kernelName = "p0thHelper";
fileName = path + kernelName + extension;
p0thHelperKernel = platform->device.buildKernel(fileName, kernelInfo);
p0thHelperKernel = platform->device.buildKernel(fileName, kernelInfo, true);

kernelName = "surfaceFlux";
fileName = path + kernelName + extension;
surfaceFluxKernel = platform->device.buildKernel(fileName, kernelInfo);
surfaceFluxKernel = platform->device.buildKernel(fileName, kernelInfo, true);
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/plugins/velRecycling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,16 @@ void velRecycling::buildKernel(occa::properties kernelInfo)
{
kernelName = "setBCVectorValue";
fileName = path + kernelName + extension;
setBCVectorValueKernel = platform->device.buildKernel(fileName, kernelInfo);
setBCVectorValueKernel = platform->device.buildKernel(fileName, kernelInfo, true);

kernelName = "getBCFlux";
fileName = path + kernelName + extension;
getBCFluxKernel = platform->device.buildKernel(fileName, kernelInfo);
getBCFluxKernel = platform->device.buildKernel(fileName, kernelInfo, true);


kernelName = "sumReduction";
fileName = path + kernelName + extension;
sumReductionKernel = platform->device.buildKernel(fileName, kernelInfo);
sumReductionKernel = platform->device.buildKernel(fileName, kernelInfo, true);
}
}

Expand Down
35 changes: 11 additions & 24 deletions src/udf/compileUDFKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,37 +16,24 @@ occa::properties compileUDFKernels()
kernelInfo["flags"].asObject();
kernelInfo["include_paths"].asArray();

auto rank = buildNodeLocal ? platform->comm.localRank : platform->comm.mpiRank;
auto communicator = buildNodeLocal ? platform->comm.mpiCommLocal : platform->comm.mpiComm;

MPI_Barrier(platform->comm.mpiComm);
const double tStart = MPI_Wtime();
if (platform->comm.mpiRank == 0)
printf("loading udf kernels ... ");
fflush(stdout);

occa::properties kernelInfoBC;

for(int pass = 0; pass < 2; ++pass)
{
bool executePass = (pass == 0) && (rank == 0);
executePass |= (pass == 1) && (rank != 0);
if(executePass){
kernelInfoBC = kernelInfo;
if (udf.loadKernels) {
// side-effect: kernelInfoBC will include any relevant user-defined kernel props
udf.loadKernels(kernelInfoBC);
}
const std::string bcDataFile = installDir + "/include/core/bcData.h";
kernelInfoBC["includes"] += bcDataFile.c_str();
std::string boundaryHeaderFileName;
platform->options.getArgs("DATA FILE", boundaryHeaderFileName);
kernelInfoBC["includes"] += realpath(boundaryHeaderFileName.c_str(), NULL);

kernelInfoBC += meshKernelProperties(N);
}
MPI_Barrier(communicator);
occa::properties kernelInfoBC = kernelInfo;
if (udf.loadKernels) {
// side-effect: kernelInfoBC will include any relevant user-defined kernel props
udf.loadKernels(kernelInfoBC);
}
const std::string bcDataFile = installDir + "/include/core/bcData.h";
kernelInfoBC["includes"] += bcDataFile.c_str();
std::string boundaryHeaderFileName;
platform->options.getArgs("DATA FILE", boundaryHeaderFileName);
kernelInfoBC["includes"] += realpath(boundaryHeaderFileName.c_str(), NULL);

kernelInfoBC += meshKernelProperties(N);

MPI_Barrier(platform->comm.mpiComm);
const double loadTime = MPI_Wtime() - tStart;
Expand Down

0 comments on commit 8ca79db

Please sign in to comment.