Skip to content

Commit

Permalink
Merge pull request #740 from CEED/natalie/device-id
Browse files Browse the repository at this point in the history
Update device ID selection for HIP/CUDA/MAGMA backends
  • Loading branch information
jeremylt authored Apr 15, 2021
2 parents b761d2c + 6dbfb41 commit ebc204c
Show file tree
Hide file tree
Showing 16 changed files with 90 additions and 66 deletions.
5 changes: 5 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,11 @@ Currently, each MAGMA library installation is only built for either CUDA or HIP.
set of libCEED backends (``/gpu/cuda/magma/*`` or ``/gpu/hip/magma/*``) will automatically be built
for the version of the MAGMA library found in ``MAGMA_DIR``.

Users can specify a device for all CUDA, HIP, and MAGMA backends through adding `:device_id=#`
after the resource name. For example:

- `/gpu/cuda/gen:device_id=1`

The ``/*/occa`` backends rely upon the `OCCA <http://github.com/libocca/occa>`_ package to provide
cross platform performance. To enable the OCCA backend, the environment variable ``OCCA_DIR`` must point
to the top-level OCCA directory, with the OCCA library located in the ``${OCCA_DIR}/lib`` (By default,
Expand Down
10 changes: 5 additions & 5 deletions backends/cuda-gen/ceed-cuda-gen.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ static int CeedInit_Cuda_gen(const char *resource, Ceed ceed) {
"Cuda backend cannot use resource: %s", resource);
// LCOV_EXCL_STOP

Ceed ceedshared;
CeedInit("/gpu/cuda/shared", &ceedshared);
ierr = CeedSetDelegate(ceed, ceedshared); CeedChkBackend(ierr);

Ceed_Cuda_gen *data;
Ceed_Cuda *data;
ierr = CeedCalloc(1, &data); CeedChkBackend(ierr);
ierr = CeedSetData(ceed, data); CeedChkBackend(ierr);
ierr = CeedCudaInit(ceed, resource, nrc); CeedChkBackend(ierr);

Ceed ceedshared;
CeedInit("/gpu/cuda/shared", &ceedshared);
ierr = CeedSetDelegate(ceed, ceedshared); CeedChkBackend(ierr);

const char fallbackresource[] = "/gpu/cuda/ref";
ierr = CeedSetOperatorFallbackResource(ceed, fallbackresource);
CeedChkBackend(ierr);
Expand Down
4 changes: 0 additions & 4 deletions backends/cuda-gen/ceed-cuda-gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,6 @@ typedef struct {
void *d_c;
} CeedQFunction_Cuda_gen;

typedef struct {
Ceed_Cuda base;
} Ceed_Cuda_gen;

CEED_INTERN int CeedQFunctionCreate_Cuda_gen(CeedQFunction qf);

CEED_INTERN int CeedOperatorCreate_Cuda_gen(CeedOperator op);
Expand Down
2 changes: 1 addition & 1 deletion backends/cuda-shared/ceed-cuda-shared-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,7 @@ int CeedBasisApplyTensor_Cuda_shared(CeedBasis basis, const CeedInt nelem,
int ierr;
Ceed ceed;
ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
Ceed_Cuda_shared *ceed_Cuda;
Ceed_Cuda *ceed_Cuda;
CeedGetData(ceed, &ceed_Cuda); CeedChkBackend(ierr);
CeedBasis_Cuda_shared *data;
CeedBasisGetData(basis, &data); CeedChkBackend(ierr);
Expand Down
11 changes: 5 additions & 6 deletions backends/cuda-shared/ceed-cuda-shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#include <ceed/ceed.h>
#include <ceed/backend.h>
#include <stdbool.h>
#include <string.h>
#include "ceed-cuda-shared.h"
#include "../cuda/ceed-cuda.h"
Expand All @@ -34,15 +33,15 @@ static int CeedInit_Cuda_shared(const char *resource, Ceed ceed) {
// LCOV_EXCL_STOP
ierr = CeedSetDeterministic(ceed, true); CeedChk(ierr);

Ceed ceedref;
CeedInit("/gpu/cuda/ref", &ceedref);
ierr = CeedSetDelegate(ceed, ceedref); CeedChk(ierr);

Ceed_Cuda_shared *data;
Ceed_Cuda *data;
ierr = CeedCalloc(1, &data); CeedChk(ierr);
ierr = CeedSetData(ceed, data); CeedChk(ierr);
ierr = CeedCudaInit(ceed, resource, nrc); CeedChk(ierr);

Ceed ceedref;
CeedInit("/gpu/cuda/ref", &ceedref);
ierr = CeedSetDelegate(ceed, ceedref); CeedChk(ierr);

ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "BasisCreateTensorH1",
CeedBasisCreateTensorH1_Cuda_shared);
CeedChk(ierr);
Expand Down
4 changes: 0 additions & 4 deletions backends/cuda-shared/ceed-cuda-shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,6 @@ typedef struct {
CeedScalar *c_G;
} CeedBasis_Cuda_shared;

typedef struct {
Ceed_Cuda base;
} Ceed_Cuda_shared;

CEED_INTERN int CeedBasisCreateTensorH1_Cuda_shared(CeedInt dim, CeedInt P1d,
CeedInt Q1d, const CeedScalar *interp1d, const CeedScalar *grad1d,
const CeedScalar *qref1d, const CeedScalar *qweight1d, CeedBasis basis);
Expand Down
14 changes: 7 additions & 7 deletions backends/cuda/ceed-cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include <cuda_runtime.h>
#include <nvrtc.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -150,22 +149,22 @@ static int CeedGetPreferredMemType_Cuda(CeedMemType *type) {
//------------------------------------------------------------------------------
int CeedCudaInit(Ceed ceed, const char *resource, int nrc) {
int ierr;
const int rlen = strlen(resource);
const bool slash = (rlen>nrc) ? (resource[nrc] == '/') : false;
const int deviceID = (slash && rlen > nrc + 1) ? atoi(&resource[nrc + 1]) : -1;
const char *device_spec = strstr(resource, ":device_id=");
const int deviceID = (device_spec) ? atoi(device_spec+11) : -1;

int currentDeviceID;
ierr = cudaGetDevice(&currentDeviceID); CeedChk_Cu(ceed,ierr);
if (deviceID >= 0 && currentDeviceID != deviceID) {
ierr = cudaSetDevice(deviceID); CeedChk_Cu(ceed,ierr);
currentDeviceID = deviceID;
}

struct cudaDeviceProp deviceProp;
ierr = cudaGetDeviceProperties(&deviceProp, deviceID); CeedChk_Cu(ceed,ierr);
ierr = cudaGetDeviceProperties(&deviceProp, currentDeviceID);
CeedChk_Cu(ceed,ierr);

Ceed_Cuda *data;
ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
data->deviceId = deviceID;
data->deviceId = currentDeviceID;
data->optblocksize = deviceProp.maxThreadsPerBlock;
return CEED_ERROR_SUCCESS;
}
Expand Down Expand Up @@ -210,6 +209,7 @@ static int CeedInit_Cuda(const char *resource, Ceed ceed) {
return CeedError(ceed, CEED_ERROR_BACKEND,
"Cuda backend cannot use resource: %s", resource);
// LCOV_EXCL_STOP
ierr = CeedSetDeterministic(ceed, true); CeedChk(ierr);

Ceed_Cuda *data;
ierr = CeedCalloc(1, &data); CeedChkBackend(ierr);
Expand Down
10 changes: 5 additions & 5 deletions backends/hip-gen/ceed-hip-gen.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ static int CeedInit_Hip_gen(const char *resource, Ceed ceed) {
"Hip backend cannot use resource: %s", resource);
// LCOV_EXCL_STOP

Ceed ceedshared;
CeedInit("/gpu/hip/shared", &ceedshared);
ierr = CeedSetDelegate(ceed, ceedshared); CeedChkBackend(ierr);

Ceed_Hip_gen *data;
Ceed_Hip *data;
ierr = CeedCalloc(1, &data); CeedChkBackend(ierr);
ierr = CeedSetData(ceed, data); CeedChkBackend(ierr);
ierr = CeedHipInit(ceed, resource, nrc); CeedChkBackend(ierr);

Ceed ceedshared;
CeedInit("/gpu/hip/shared", &ceedshared);
ierr = CeedSetDelegate(ceed, ceedshared); CeedChkBackend(ierr);

const char fallbackresource[] = "/gpu/hip/ref";
ierr = CeedSetOperatorFallbackResource(ceed, fallbackresource);
CeedChkBackend(ierr);
Expand Down
4 changes: 0 additions & 4 deletions backends/hip-gen/ceed-hip-gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,6 @@ typedef struct {
void *d_c;
} CeedQFunction_Hip_gen;

typedef struct {
Ceed_Hip base;
} Ceed_Hip_gen;

CEED_INTERN int CeedQFunctionCreate_Hip_gen(CeedQFunction qf);

CEED_INTERN int CeedOperatorCreate_Hip_gen(CeedOperator op);
Expand Down
2 changes: 1 addition & 1 deletion backends/hip-shared/ceed-hip-shared-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,7 @@ int CeedBasisApplyTensor_Hip_shared(CeedBasis basis, const CeedInt nelem,
int ierr;
Ceed ceed;
ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
Ceed_Hip_shared *ceed_Hip;
Ceed_Hip *ceed_Hip;
CeedGetData(ceed, &ceed_Hip); CeedChkBackend(ierr);
CeedBasis_Hip_shared *data;
CeedBasisGetData(basis, &data); CeedChkBackend(ierr);
Expand Down
10 changes: 5 additions & 5 deletions backends/hip-shared/ceed-hip-shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@ static int CeedInit_Hip_shared(const char *resource, Ceed ceed) {
// LCOV_EXCL_STOP
ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr);

Ceed ceedref;
CeedInit("/gpu/hip/ref", &ceedref);
ierr = CeedSetDelegate(ceed, ceedref); CeedChkBackend(ierr);

Ceed_Hip_shared *data;
Ceed_Hip *data;
ierr = CeedCalloc(1, &data); CeedChkBackend(ierr);
ierr = CeedSetData(ceed, data); CeedChkBackend(ierr);
ierr = CeedHipInit(ceed, resource, nrc); CeedChkBackend(ierr);

Ceed ceedref;
CeedInit("/gpu/hip/ref", &ceedref);
ierr = CeedSetDelegate(ceed, ceedref); CeedChkBackend(ierr);

ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "BasisCreateTensorH1",
CeedBasisCreateTensorH1_Hip_shared);
CeedChkBackend(ierr);
Expand Down
4 changes: 0 additions & 4 deletions backends/hip-shared/ceed-hip-shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,6 @@ typedef struct {
CeedScalar *d_qweight1d;
} CeedBasis_Hip_shared;

typedef struct {
Ceed_Hip base;
} Ceed_Hip_shared;

CEED_INTERN int CeedBasisCreateTensorH1_Hip_shared(CeedInt dim, CeedInt P1d,
CeedInt Q1d, const CeedScalar *interp1d, const CeedScalar *grad1d,
const CeedScalar *qref1d, const CeedScalar *qweight1d, CeedBasis basis);
Expand Down
14 changes: 8 additions & 6 deletions backends/hip/ceed-hip.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

#include <ceed/ceed.h>
#include <ceed/backend.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#include "ceed-hip.h"

//------------------------------------------------------------------------------
Expand All @@ -33,22 +33,23 @@ static int CeedGetPreferredMemType_Hip(CeedMemType *type) {
//------------------------------------------------------------------------------
int CeedHipInit(Ceed ceed, const char *resource, int nrc) {
int ierr;
const int rlen = strlen(resource);
const bool slash = (rlen>nrc) ? (resource[nrc] == '/') : false;
const int deviceID = (slash && rlen > nrc + 1) ? atoi(&resource[nrc + 1]) : -1;
const char *device_spec = strstr(resource, ":device_id=");
const int deviceID = (device_spec) ? atoi(device_spec+11) : -1;

int currentDeviceID;
ierr = hipGetDevice(&currentDeviceID); CeedChk_Hip(ceed,ierr);
if (deviceID >= 0 && currentDeviceID != deviceID) {
ierr = hipSetDevice(deviceID); CeedChk_Hip(ceed,ierr);
currentDeviceID = deviceID;
}

struct hipDeviceProp_t deviceProp;
ierr = hipGetDeviceProperties(&deviceProp, deviceID); CeedChk_Hip(ceed,ierr);
ierr = hipGetDeviceProperties(&deviceProp, currentDeviceID);
CeedChk_Hip(ceed,ierr);

Ceed_Hip *data;
ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
data->deviceId = deviceID;
data->deviceId = currentDeviceID;
data->optblocksize = 256;
return CEED_ERROR_SUCCESS;
}
Expand Down Expand Up @@ -93,6 +94,7 @@ static int CeedInit_Hip(const char *resource, Ceed ceed) {
return CeedError(ceed, CEED_ERROR_BACKEND,
"Hip backend cannot use resource: %s", resource);
// LCOV_EXCL_STOP
ierr = CeedSetDeterministic(ceed, true); CeedChk(ierr);

Ceed_Hip *data;
ierr = CeedCalloc(1, &data); CeedChkBackend(ierr);
Expand Down
24 changes: 22 additions & 2 deletions backends/magma/ceed-magma-det.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,38 @@

#include <ceed/ceed.h>
#include <ceed/backend.h>
#include <string.h>
#include <stdlib.h>
#include "ceed-magma.h"

CEED_INTERN int CeedInit_Magma_Det(const char *resource, Ceed ceed) {
int ierr;
if (strcmp(resource, "/gpu/cuda/magma/det")
&& strcmp(resource, "/gpu/hip/magma/det"))
const int nrc = 18; // number of characters in resource
if (strncmp(resource, "/gpu/cuda/magma/det", nrc)
&& strncmp(resource, "/gpu/hip/magma/det", nrc))
// LCOV_EXCL_START
return CeedError(ceed, CEED_ERROR_BACKEND,
"Magma backend cannot use resource: %s", resource);
// LCOV_EXCL_STOP
ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr);

Ceed_Magma *data;
ierr = CeedCalloc(sizeof(Ceed_Magma), &data); CeedChkBackend(ierr);
ierr = CeedSetData(ceed, data); CeedChkBackend(ierr);

// get/set device ID
const char *device_spec = strstr(resource, ":device_id=");
const int deviceID = (device_spec) ? atoi(device_spec+11) : -1;

int currentDeviceID;
magma_getdevice(&currentDeviceID);
if (deviceID >= 0 && currentDeviceID != deviceID) {
magma_setdevice(deviceID);
currentDeviceID = deviceID;
}
// create a queue that uses the null stream
data->device = currentDeviceID;

// Create reference CEED that implementation will be dispatched
// through unless overridden
Ceed ceedref;
Expand Down
1 change: 1 addition & 0 deletions backends/magma/ceed-magma-restriction.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <ceed/ceed.h>
#include <ceed/backend.h>
#include <string.h>
#include "ceed-magma.h"

static int CeedElemRestrictionApply_Magma(CeedElemRestriction r,
Expand Down
37 changes: 25 additions & 12 deletions backends/magma/ceed-magma.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <ceed/ceed.h>
#include <ceed/backend.h>
#include <string.h>
#include <stdlib.h>
#include "ceed-magma.h"

static int CeedDestroy_Magma(Ceed ceed) {
Expand All @@ -30,22 +31,14 @@ static int CeedDestroy_Magma(Ceed ceed) {

static int CeedInit_Magma(const char *resource, Ceed ceed) {
int ierr;
if (strcmp(resource, "/gpu/cuda/magma") && strcmp(resource, "/gpu/hip/magma"))
const int nrc = 14; // number of characters in resource
if (strncmp(resource, "/gpu/cuda/magma", nrc)
&& strncmp(resource, "/gpu/hip/magma", nrc))
// LCOV_EXCL_START
return CeedError(ceed, CEED_ERROR_BACKEND,
"Magma backend cannot use resource: %s", resource);
// LCOV_EXCL_STOP

// Create reference CEED that implementation will be dispatched
// through unless overridden
Ceed ceedref;
#ifdef HAVE_HIP
CeedInit("/gpu/hip/ref", &ceedref);
#else
CeedInit("/gpu/cuda/ref", &ceedref);
#endif
ierr = CeedSetDelegate(ceed, ceedref); CeedChkBackend(ierr);

ierr = magma_init();
if (ierr)
// LCOV_EXCL_START
Expand All @@ -64,14 +57,34 @@ static int CeedInit_Magma(const char *resource, Ceed ceed) {
data->maxthreads[1] = 128; // for 2D kernels
data->maxthreads[2] = 64; // for 3D kernels

// get/set device ID
const char *device_spec = strstr(resource, ":device_id=");
const int deviceID = (device_spec) ? atoi(device_spec+11) : -1;

int currentDeviceID;
magma_getdevice(&currentDeviceID);
if (deviceID >= 0 && currentDeviceID != deviceID) {
magma_setdevice(deviceID);
currentDeviceID = deviceID;
}
// create a queue that uses the null stream
magma_getdevice( &(data->device) );
data->device = currentDeviceID;
#ifdef HAVE_HIP
magma_queue_create_from_hip(data->device, NULL, NULL, NULL, &(data->queue));
#else
magma_queue_create_from_cuda(data->device, NULL, NULL, NULL, &(data->queue));
#endif

// Create reference CEED that implementation will be dispatched
// through unless overridden
Ceed ceedref;
#ifdef HAVE_HIP
CeedInit("/gpu/hip/ref", &ceedref);
#else
CeedInit("/gpu/cuda/ref", &ceedref);
#endif
ierr = CeedSetDelegate(ceed, ceedref); CeedChkBackend(ierr);

ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "ElemRestrictionCreate",
CeedElemRestrictionCreate_Magma); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "Ceed", ceed,
Expand Down

0 comments on commit ebc204c

Please sign in to comment.