From 6dbfb411544a7a6bdd33f391c97c69cd9e1f444a Mon Sep 17 00:00:00 2001 From: nbeams <246972+nbeams@users.noreply.github.com> Date: Mon, 5 Apr 2021 18:09:41 -0400 Subject: [PATCH] Update device ID selection for HIP/CUDA backends; add for MAGMA backends --- README.rst | 5 +++ backends/cuda-gen/ceed-cuda-gen.c | 10 ++--- backends/cuda-gen/ceed-cuda-gen.h | 4 -- backends/cuda-shared/ceed-cuda-shared-basis.c | 2 +- backends/cuda-shared/ceed-cuda-shared.c | 11 +++--- backends/cuda-shared/ceed-cuda-shared.h | 4 -- backends/cuda/ceed-cuda.c | 14 +++---- backends/hip-gen/ceed-hip-gen.c | 10 ++--- backends/hip-gen/ceed-hip-gen.h | 4 -- backends/hip-shared/ceed-hip-shared-basis.c | 2 +- backends/hip-shared/ceed-hip-shared.c | 10 ++--- backends/hip-shared/ceed-hip-shared.h | 4 -- backends/hip/ceed-hip.c | 14 ++++--- backends/magma/ceed-magma-det.c | 24 +++++++++++- backends/magma/ceed-magma-restriction.c | 1 + backends/magma/ceed-magma.c | 37 +++++++++++++------ 16 files changed, 90 insertions(+), 66 deletions(-) diff --git a/README.rst b/README.rst index 6b7a34c652..ecf425f90b 100644 --- a/README.rst +++ b/README.rst @@ -265,6 +265,11 @@ Currently, each MAGMA library installation is only built for either CUDA or HIP. set of libCEED backends (``/gpu/cuda/magma/*`` or ``/gpu/hip/magma/*``) will automatically be built for the version of the MAGMA library found in ``MAGMA_DIR``. +Users can specify a device for all CUDA, HIP, and MAGMA backends through adding `:device_id=#` +after the resource name. For example: + + - `/gpu/cuda/gen:device_id=1` + The ``/*/occa`` backends rely upon the `OCCA `_ package to provide cross platform performance. To enable the OCCA backend, the environment variable ``OCCA_DIR`` must point to the top-level OCCA directory, with the OCCA library located in the ``${OCCA_DIR}/lib`` (By default, diff --git a/backends/cuda-gen/ceed-cuda-gen.c b/backends/cuda-gen/ceed-cuda-gen.c index 4c2a74351b..2b618b83cf 100644 --- a/backends/cuda-gen/ceed-cuda-gen.c +++ b/backends/cuda-gen/ceed-cuda-gen.c @@ -32,15 +32,15 @@ static int CeedInit_Cuda_gen(const char *resource, Ceed ceed) { "Cuda backend cannot use resource: %s", resource); // LCOV_EXCL_STOP - Ceed ceedshared; - CeedInit("/gpu/cuda/shared", &ceedshared); - ierr = CeedSetDelegate(ceed, ceedshared); CeedChkBackend(ierr); - - Ceed_Cuda_gen *data; + Ceed_Cuda *data; ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); ierr = CeedSetData(ceed, data); CeedChkBackend(ierr); ierr = CeedCudaInit(ceed, resource, nrc); CeedChkBackend(ierr); + Ceed ceedshared; + CeedInit("/gpu/cuda/shared", &ceedshared); + ierr = CeedSetDelegate(ceed, ceedshared); CeedChkBackend(ierr); + const char fallbackresource[] = "/gpu/cuda/ref"; ierr = CeedSetOperatorFallbackResource(ceed, fallbackresource); CeedChkBackend(ierr); diff --git a/backends/cuda-gen/ceed-cuda-gen.h b/backends/cuda-gen/ceed-cuda-gen.h index 32db599298..e490296b86 100644 --- a/backends/cuda-gen/ceed-cuda-gen.h +++ b/backends/cuda-gen/ceed-cuda-gen.h @@ -44,10 +44,6 @@ typedef struct { void *d_c; } CeedQFunction_Cuda_gen; -typedef struct { - Ceed_Cuda base; -} Ceed_Cuda_gen; - CEED_INTERN int CeedQFunctionCreate_Cuda_gen(CeedQFunction qf); CEED_INTERN int CeedOperatorCreate_Cuda_gen(CeedOperator op); diff --git a/backends/cuda-shared/ceed-cuda-shared-basis.c b/backends/cuda-shared/ceed-cuda-shared-basis.c index 9a10a0fa25..13d0584b87 100644 --- a/backends/cuda-shared/ceed-cuda-shared-basis.c +++ b/backends/cuda-shared/ceed-cuda-shared-basis.c @@ -773,7 +773,7 @@ int CeedBasisApplyTensor_Cuda_shared(CeedBasis basis, const CeedInt nelem, int ierr; Ceed ceed; ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); - Ceed_Cuda_shared *ceed_Cuda; + Ceed_Cuda *ceed_Cuda; CeedGetData(ceed, &ceed_Cuda); CeedChkBackend(ierr); CeedBasis_Cuda_shared *data; CeedBasisGetData(basis, &data); CeedChkBackend(ierr); diff --git a/backends/cuda-shared/ceed-cuda-shared.c b/backends/cuda-shared/ceed-cuda-shared.c index 25bf0da2e9..cdc8447708 100644 --- a/backends/cuda-shared/ceed-cuda-shared.c +++ b/backends/cuda-shared/ceed-cuda-shared.c @@ -16,7 +16,6 @@ #include #include -#include #include #include "ceed-cuda-shared.h" #include "../cuda/ceed-cuda.h" @@ -34,15 +33,15 @@ static int CeedInit_Cuda_shared(const char *resource, Ceed ceed) { // LCOV_EXCL_STOP ierr = CeedSetDeterministic(ceed, true); CeedChk(ierr); - Ceed ceedref; - CeedInit("/gpu/cuda/ref", &ceedref); - ierr = CeedSetDelegate(ceed, ceedref); CeedChk(ierr); - - Ceed_Cuda_shared *data; + Ceed_Cuda *data; ierr = CeedCalloc(1, &data); CeedChk(ierr); ierr = CeedSetData(ceed, data); CeedChk(ierr); ierr = CeedCudaInit(ceed, resource, nrc); CeedChk(ierr); + Ceed ceedref; + CeedInit("/gpu/cuda/ref", &ceedref); + ierr = CeedSetDelegate(ceed, ceedref); CeedChk(ierr); + ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "BasisCreateTensorH1", CeedBasisCreateTensorH1_Cuda_shared); CeedChk(ierr); diff --git a/backends/cuda-shared/ceed-cuda-shared.h b/backends/cuda-shared/ceed-cuda-shared.h index e0a1a590a1..1a3cd1b02b 100644 --- a/backends/cuda-shared/ceed-cuda-shared.h +++ b/backends/cuda-shared/ceed-cuda-shared.h @@ -35,10 +35,6 @@ typedef struct { CeedScalar *c_G; } CeedBasis_Cuda_shared; -typedef struct { - Ceed_Cuda base; -} Ceed_Cuda_shared; - CEED_INTERN int CeedBasisCreateTensorH1_Cuda_shared(CeedInt dim, CeedInt P1d, CeedInt Q1d, const CeedScalar *interp1d, const CeedScalar *grad1d, const CeedScalar *qref1d, const CeedScalar *qweight1d, CeedBasis basis); diff --git a/backends/cuda/ceed-cuda.c b/backends/cuda/ceed-cuda.c index 42f65a913f..4f03072693 100644 --- a/backends/cuda/ceed-cuda.c +++ b/backends/cuda/ceed-cuda.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -150,22 +149,22 @@ static int CeedGetPreferredMemType_Cuda(CeedMemType *type) { //------------------------------------------------------------------------------ int CeedCudaInit(Ceed ceed, const char *resource, int nrc) { int ierr; - const int rlen = strlen(resource); - const bool slash = (rlen>nrc) ? (resource[nrc] == '/') : false; - const int deviceID = (slash && rlen > nrc + 1) ? atoi(&resource[nrc + 1]) : -1; + const char *device_spec = strstr(resource, ":device_id="); + const int deviceID = (device_spec) ? atoi(device_spec+11) : -1; int currentDeviceID; ierr = cudaGetDevice(¤tDeviceID); CeedChk_Cu(ceed,ierr); if (deviceID >= 0 && currentDeviceID != deviceID) { ierr = cudaSetDevice(deviceID); CeedChk_Cu(ceed,ierr); + currentDeviceID = deviceID; } - struct cudaDeviceProp deviceProp; - ierr = cudaGetDeviceProperties(&deviceProp, deviceID); CeedChk_Cu(ceed,ierr); + ierr = cudaGetDeviceProperties(&deviceProp, currentDeviceID); + CeedChk_Cu(ceed,ierr); Ceed_Cuda *data; ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); - data->deviceId = deviceID; + data->deviceId = currentDeviceID; data->optblocksize = deviceProp.maxThreadsPerBlock; return CEED_ERROR_SUCCESS; } @@ -210,6 +209,7 @@ static int CeedInit_Cuda(const char *resource, Ceed ceed) { return CeedError(ceed, CEED_ERROR_BACKEND, "Cuda backend cannot use resource: %s", resource); // LCOV_EXCL_STOP + ierr = CeedSetDeterministic(ceed, true); CeedChk(ierr); Ceed_Cuda *data; ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); diff --git a/backends/hip-gen/ceed-hip-gen.c b/backends/hip-gen/ceed-hip-gen.c index b0ad0b5786..3dea15297f 100644 --- a/backends/hip-gen/ceed-hip-gen.c +++ b/backends/hip-gen/ceed-hip-gen.c @@ -32,15 +32,15 @@ static int CeedInit_Hip_gen(const char *resource, Ceed ceed) { "Hip backend cannot use resource: %s", resource); // LCOV_EXCL_STOP - Ceed ceedshared; - CeedInit("/gpu/hip/shared", &ceedshared); - ierr = CeedSetDelegate(ceed, ceedshared); CeedChkBackend(ierr); - - Ceed_Hip_gen *data; + Ceed_Hip *data; ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); ierr = CeedSetData(ceed, data); CeedChkBackend(ierr); ierr = CeedHipInit(ceed, resource, nrc); CeedChkBackend(ierr); + Ceed ceedshared; + CeedInit("/gpu/hip/shared", &ceedshared); + ierr = CeedSetDelegate(ceed, ceedshared); CeedChkBackend(ierr); + const char fallbackresource[] = "/gpu/hip/ref"; ierr = CeedSetOperatorFallbackResource(ceed, fallbackresource); CeedChkBackend(ierr); diff --git a/backends/hip-gen/ceed-hip-gen.h b/backends/hip-gen/ceed-hip-gen.h index b6b2754a24..bc7360bb69 100644 --- a/backends/hip-gen/ceed-hip-gen.h +++ b/backends/hip-gen/ceed-hip-gen.h @@ -44,10 +44,6 @@ typedef struct { void *d_c; } CeedQFunction_Hip_gen; -typedef struct { - Ceed_Hip base; -} Ceed_Hip_gen; - CEED_INTERN int CeedQFunctionCreate_Hip_gen(CeedQFunction qf); CEED_INTERN int CeedOperatorCreate_Hip_gen(CeedOperator op); diff --git a/backends/hip-shared/ceed-hip-shared-basis.c b/backends/hip-shared/ceed-hip-shared-basis.c index bdc6586b3b..e1dcdb17f1 100644 --- a/backends/hip-shared/ceed-hip-shared-basis.c +++ b/backends/hip-shared/ceed-hip-shared-basis.c @@ -859,7 +859,7 @@ int CeedBasisApplyTensor_Hip_shared(CeedBasis basis, const CeedInt nelem, int ierr; Ceed ceed; ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); - Ceed_Hip_shared *ceed_Hip; + Ceed_Hip *ceed_Hip; CeedGetData(ceed, &ceed_Hip); CeedChkBackend(ierr); CeedBasis_Hip_shared *data; CeedBasisGetData(basis, &data); CeedChkBackend(ierr); diff --git a/backends/hip-shared/ceed-hip-shared.c b/backends/hip-shared/ceed-hip-shared.c index 0f7bde94c6..46341a4850 100644 --- a/backends/hip-shared/ceed-hip-shared.c +++ b/backends/hip-shared/ceed-hip-shared.c @@ -34,15 +34,15 @@ static int CeedInit_Hip_shared(const char *resource, Ceed ceed) { // LCOV_EXCL_STOP ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr); - Ceed ceedref; - CeedInit("/gpu/hip/ref", &ceedref); - ierr = CeedSetDelegate(ceed, ceedref); CeedChkBackend(ierr); - - Ceed_Hip_shared *data; + Ceed_Hip *data; ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); ierr = CeedSetData(ceed, data); CeedChkBackend(ierr); ierr = CeedHipInit(ceed, resource, nrc); CeedChkBackend(ierr); + Ceed ceedref; + CeedInit("/gpu/hip/ref", &ceedref); + ierr = CeedSetDelegate(ceed, ceedref); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "BasisCreateTensorH1", CeedBasisCreateTensorH1_Hip_shared); CeedChkBackend(ierr); diff --git a/backends/hip-shared/ceed-hip-shared.h b/backends/hip-shared/ceed-hip-shared.h index 9fecf8a144..20ad33d93d 100644 --- a/backends/hip-shared/ceed-hip-shared.h +++ b/backends/hip-shared/ceed-hip-shared.h @@ -34,10 +34,6 @@ typedef struct { CeedScalar *d_qweight1d; } CeedBasis_Hip_shared; -typedef struct { - Ceed_Hip base; -} Ceed_Hip_shared; - CEED_INTERN int CeedBasisCreateTensorH1_Hip_shared(CeedInt dim, CeedInt P1d, CeedInt Q1d, const CeedScalar *interp1d, const CeedScalar *grad1d, const CeedScalar *qref1d, const CeedScalar *qweight1d, CeedBasis basis); diff --git a/backends/hip/ceed-hip.c b/backends/hip/ceed-hip.c index 4a937b22ad..b38cd42079 100644 --- a/backends/hip/ceed-hip.c +++ b/backends/hip/ceed-hip.c @@ -16,8 +16,8 @@ #include #include -#include #include +#include #include "ceed-hip.h" //------------------------------------------------------------------------------ @@ -33,22 +33,23 @@ static int CeedGetPreferredMemType_Hip(CeedMemType *type) { //------------------------------------------------------------------------------ int CeedHipInit(Ceed ceed, const char *resource, int nrc) { int ierr; - const int rlen = strlen(resource); - const bool slash = (rlen>nrc) ? (resource[nrc] == '/') : false; - const int deviceID = (slash && rlen > nrc + 1) ? atoi(&resource[nrc + 1]) : -1; + const char *device_spec = strstr(resource, ":device_id="); + const int deviceID = (device_spec) ? atoi(device_spec+11) : -1; int currentDeviceID; ierr = hipGetDevice(¤tDeviceID); CeedChk_Hip(ceed,ierr); if (deviceID >= 0 && currentDeviceID != deviceID) { ierr = hipSetDevice(deviceID); CeedChk_Hip(ceed,ierr); + currentDeviceID = deviceID; } struct hipDeviceProp_t deviceProp; - ierr = hipGetDeviceProperties(&deviceProp, deviceID); CeedChk_Hip(ceed,ierr); + ierr = hipGetDeviceProperties(&deviceProp, currentDeviceID); + CeedChk_Hip(ceed,ierr); Ceed_Hip *data; ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); - data->deviceId = deviceID; + data->deviceId = currentDeviceID; data->optblocksize = 256; return CEED_ERROR_SUCCESS; } @@ -93,6 +94,7 @@ static int CeedInit_Hip(const char *resource, Ceed ceed) { return CeedError(ceed, CEED_ERROR_BACKEND, "Hip backend cannot use resource: %s", resource); // LCOV_EXCL_STOP + ierr = CeedSetDeterministic(ceed, true); CeedChk(ierr); Ceed_Hip *data; ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); diff --git a/backends/magma/ceed-magma-det.c b/backends/magma/ceed-magma-det.c index 72ed2c8aa8..6cb1eb7361 100644 --- a/backends/magma/ceed-magma-det.c +++ b/backends/magma/ceed-magma-det.c @@ -16,18 +16,38 @@ #include #include +#include +#include #include "ceed-magma.h" CEED_INTERN int CeedInit_Magma_Det(const char *resource, Ceed ceed) { int ierr; - if (strcmp(resource, "/gpu/cuda/magma/det") - && strcmp(resource, "/gpu/hip/magma/det")) + const int nrc = 18; // number of characters in resource + if (strncmp(resource, "/gpu/cuda/magma/det", nrc) + && strncmp(resource, "/gpu/hip/magma/det", nrc)) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_BACKEND, "Magma backend cannot use resource: %s", resource); // LCOV_EXCL_STOP ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr); + Ceed_Magma *data; + ierr = CeedCalloc(sizeof(Ceed_Magma), &data); CeedChkBackend(ierr); + ierr = CeedSetData(ceed, data); CeedChkBackend(ierr); + + // get/set device ID + const char *device_spec = strstr(resource, ":device_id="); + const int deviceID = (device_spec) ? atoi(device_spec+11) : -1; + + int currentDeviceID; + magma_getdevice(¤tDeviceID); + if (deviceID >= 0 && currentDeviceID != deviceID) { + magma_setdevice(deviceID); + currentDeviceID = deviceID; + } + // create a queue that uses the null stream + data->device = currentDeviceID; + // Create reference CEED that implementation will be dispatched // through unless overridden Ceed ceedref; diff --git a/backends/magma/ceed-magma-restriction.c b/backends/magma/ceed-magma-restriction.c index 5f3e7fe3fe..35f12f43ee 100644 --- a/backends/magma/ceed-magma-restriction.c +++ b/backends/magma/ceed-magma-restriction.c @@ -16,6 +16,7 @@ #include #include +#include #include "ceed-magma.h" static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, diff --git a/backends/magma/ceed-magma.c b/backends/magma/ceed-magma.c index 1a562cdb6a..fbc79c177f 100644 --- a/backends/magma/ceed-magma.c +++ b/backends/magma/ceed-magma.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "ceed-magma.h" static int CeedDestroy_Magma(Ceed ceed) { @@ -30,22 +31,14 @@ static int CeedDestroy_Magma(Ceed ceed) { static int CeedInit_Magma(const char *resource, Ceed ceed) { int ierr; - if (strcmp(resource, "/gpu/cuda/magma") && strcmp(resource, "/gpu/hip/magma")) + const int nrc = 14; // number of characters in resource + if (strncmp(resource, "/gpu/cuda/magma", nrc) + && strncmp(resource, "/gpu/hip/magma", nrc)) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_BACKEND, "Magma backend cannot use resource: %s", resource); // LCOV_EXCL_STOP - // Create reference CEED that implementation will be dispatched - // through unless overridden - Ceed ceedref; - #ifdef HAVE_HIP - CeedInit("/gpu/hip/ref", &ceedref); - #else - CeedInit("/gpu/cuda/ref", &ceedref); - #endif - ierr = CeedSetDelegate(ceed, ceedref); CeedChkBackend(ierr); - ierr = magma_init(); if (ierr) // LCOV_EXCL_START @@ -64,14 +57,34 @@ static int CeedInit_Magma(const char *resource, Ceed ceed) { data->maxthreads[1] = 128; // for 2D kernels data->maxthreads[2] = 64; // for 3D kernels + // get/set device ID + const char *device_spec = strstr(resource, ":device_id="); + const int deviceID = (device_spec) ? atoi(device_spec+11) : -1; + + int currentDeviceID; + magma_getdevice(¤tDeviceID); + if (deviceID >= 0 && currentDeviceID != deviceID) { + magma_setdevice(deviceID); + currentDeviceID = deviceID; + } // create a queue that uses the null stream - magma_getdevice( &(data->device) ); + data->device = currentDeviceID; #ifdef HAVE_HIP magma_queue_create_from_hip(data->device, NULL, NULL, NULL, &(data->queue)); #else magma_queue_create_from_cuda(data->device, NULL, NULL, NULL, &(data->queue)); #endif + // Create reference CEED that implementation will be dispatched + // through unless overridden + Ceed ceedref; + #ifdef HAVE_HIP + CeedInit("/gpu/hip/ref", &ceedref); + #else + CeedInit("/gpu/cuda/ref", &ceedref); + #endif + ierr = CeedSetDelegate(ceed, ceedref); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "ElemRestrictionCreate", CeedElemRestrictionCreate_Magma); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Ceed", ceed,