From 3569eb2b9ff140271b4f8fd0caab6fbe5d7af579 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 10 Nov 2021 16:58:28 -0500 Subject: [PATCH 01/11] Initial work on HIP support. --- Makefile | 7 +-- contrib/cuda_hip_wrapper.h | 75 +++++++++++++++++++++++++++++++++ contrib/utils.h | 2 +- contrib/utils_fp.h | 2 +- include/cufinufft_eitherprec.h | 3 +- include/cufinufft_errors.h | 2 + sites/make.inc.olcf_spock | 7 +++ src/2d/cufinufft2d.cu | 2 +- src/2d/interp2d_wrapper.cu | 2 +- src/2d/spread2d_wrapper.cu | 2 +- src/2d/spread2d_wrapper_paul.cu | 2 +- src/2d/spreadinterp2d.cu | 2 +- src/3d/cufinufft3d.cu | 2 +- src/3d/interp3d_wrapper.cu | 2 +- src/3d/spread3d_wrapper.cu | 2 +- src/3d/spreadinterp3d.cu | 2 +- src/cufinufft.cu | 2 +- src/deconvolve_wrapper.cu | 3 +- src/memtransfer_wrapper.cu | 2 +- src/profile.cu | 8 ++++ 20 files changed, 111 insertions(+), 20 deletions(-) create mode 100644 contrib/cuda_hip_wrapper.h create mode 100644 sites/make.inc.olcf_spock diff --git a/Makefile b/Makefile index 5a966ad1..4ddc3ac3 100644 --- a/Makefile +++ b/Makefile @@ -35,6 +35,7 @@ CFLAGS ?= -fPIC -O3 -funroll-loops -march=native CXXFLAGS ?= $(CFLAGS) -std=c++14 NVCCFLAGS ?= -std=c++14 -ccbin=$(CXX) -O3 $(NVARCH) -Wno-deprecated-gpu-targets \ --default-stream per-thread -Xcompiler "$(CXXFLAGS)" +DEVFLAGS ?= --device-c # For debugging, tell nvcc to add symbols to host and device code respectively, #NVCCFLAGS+= -g -G @@ -51,7 +52,7 @@ else endif # Common includes -INC += -I$(CUDA_ROOT)/include -Icontrib/cuda_samples +INC += -Icontrib/cuda_samples # NVCC-specific libs NVCC_LIBS_PATH += -L$(CUDA_ROOT)/lib64 @@ -104,13 +105,13 @@ CUFINUFFTOBJS_32=$(CUFINUFFTOBJS_64:%.o=%_32.o) %_32.o: %.c $(HEADERS) $(CC) -DSINGLE -c $(CFLAGS) $(INC) $< -o $@ %_32.o: %.cu $(HEADERS) - $(NVCC) -DSINGLE --device-c -c $(NVCCFLAGS) $(INC) $< -o $@ + $(NVCC) -DSINGLE $(DEVFLAGS) -c $(NVCCFLAGS) $(INC) $< -o $@ %.o: %.cpp $(HEADERS) $(CXX) -c $(CXXFLAGS) $(INC) $< -o $@ %.o: %.c $(HEADERS) $(CC) -c $(CFLAGS) $(INC) $< -o $@ %.o: %.cu $(HEADERS) - $(NVCC) --device-c -c $(NVCCFLAGS) $(INC) $< -o $@ + $(NVCC) $(DEVFLAGS) -c $(NVCCFLAGS) $(INC) $< -o $@ default: all diff --git a/contrib/cuda_hip_wrapper.h b/contrib/cuda_hip_wrapper.h new file mode 100644 index 00000000..92b02245 --- /dev/null +++ b/contrib/cuda_hip_wrapper.h @@ -0,0 +1,75 @@ +#ifndef CUDA_HIP_WRAPPER_H +#define CUDA_HIP_WRAPPER_H + +#ifdef USE_HIP + +#include +#include +#include + +// cuda.h adapters +#define cudaDeviceSynchronize hipDeviceSynchronize +#define cudaError_t hipError_t +#define cudaEventCreate hipEventCreate +#define cudaEventRecord hipEventRecord +#define cudaEvent_t hipEvent_t +#define cudaFree hipFree +#define cudaGetDevice hipGetDevice +#define cudaGetErrorName hipGetErrorName +#define cudaGetErrorString hipGetErrorString +#define cudaGetLastError hipGetLastError +#define cudaMalloc hipMalloc +#define cudaMemcpy hipMemcpy +#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost +#define cudaMemcpyHostToDevice hipMemcpyHostToDevice +#define cudaMemset hipMemset +#define cudaSetDevice hipSetDevice +#define cudaStreamCreate hipStreamCreate +#define cudaStreamDestroy hipStreamDestroy +#define cudaStream_t hipStream_t +#define cudaSuccess hipSuccess + +// cuComplex.h adapters +#define cuDoubleComplex hipDoubleComplex +#define cuFloatComplex hipFloatComplex + +// cufft.h adapters +#define CUFFT_ALLOC_FAILED HIPFFT_ALLOC_FAILED +#define CUFFT_C2C HIPFFT_C2C +#define CUFFT_EXEC_FAILED HIPFFT_EXEC_FAILED +#define CUFFT_INCOMPLETE_PARAMETER_LIST HIPFFT_INCOMPLETE_PARAMETER_LIST +#define CUFFT_INTERNAL_ERROR HIPFFT_INTERNAL_ERROR +#define CUFFT_INVALID_DEVICE HIPFFT_INVALID_DEVICE +#define CUFFT_INVALID_PLAN HIPFFT_INVALID_PLAN +#define CUFFT_INVALID_SIZE HIPFFT_INVALID_SIZE +#define CUFFT_INVALID_TYPE HIPFFT_INVALID_TYPE +#define CUFFT_INVALID_TYPE HIPFFT_INVALID_TYPE +#define CUFFT_INVALID_VALUE HIPFFT_INVALID_VALUE +#define CUFFT_NOT_IMPLEMENTED HIPFFT_NOT_IMPLEMENTED +#define CUFFT_NOT_SUPPORTED HIPFFT_NOT_SUPPORTED +#define CUFFT_NO_WORKSPACE HIPFFT_NO_WORKSPACE +#define CUFFT_PARSE_ERROR HIPFFT_PARSE_ERROR +#define CUFFT_SETUP_FAILED HIPFFT_SETUP_FAILED +#define CUFFT_SUCCESS HIPFFT_SUCCESS +#define CUFFT_UNALIGNED_DATA HIPFFT_UNALIGNED_DATA +#define CUFFT_Z2Z HIPFFT_Z2Z +#define cufftDestroy hipfftDestroy +#define cufftExecC2C hipfftExecC2C +#define cufftExecZ2Z hipfftExecZ2Z +#define cufftHandle hipfftHandle +#define cufftPlanMany hipfftPlanMany +#define cufftResult_t hipfftResult_t + +// helper_cuda.h adapters +#define __DRIVER_TYPES_H__ + +#else + +#include +#include +#include +#include + +#endif + +#endif diff --git a/contrib/utils.h b/contrib/utils.h index ac5c5338..05f06336 100644 --- a/contrib/utils.h +++ b/contrib/utils.h @@ -8,7 +8,7 @@ #include #include // C++ type complex -#include +#include "cuda_hip_wrapper.h" #include "dataTypes.h" // fraction growth cut-off in arraywidcen(), to decide if translate in type-3 diff --git a/contrib/utils_fp.h b/contrib/utils_fp.h index 7c15969f..d3faaaa8 100644 --- a/contrib/utils_fp.h +++ b/contrib/utils_fp.h @@ -15,7 +15,7 @@ #include #include // C++ type complex -#include +#include "cuda_hip_wrapper.h" #include "dataTypes.h" diff --git a/include/cufinufft_eitherprec.h b/include/cufinufft_eitherprec.h index 89fca0cd..d05c9758 100644 --- a/include/cufinufft_eitherprec.h +++ b/include/cufinufft_eitherprec.h @@ -7,9 +7,8 @@ // Make sure we don't include double or single headers more than once... #include -#include +#include "cuda_hip_wrapper.h" #include -#include #include "cufinufft_opts.h" #include "../src/precision_independent.h" #include "cufinufft_errors.h" diff --git a/include/cufinufft_errors.h b/include/cufinufft_errors.h index 18ef7135..a5792b3b 100644 --- a/include/cufinufft_errors.h +++ b/include/cufinufft_errors.h @@ -36,8 +36,10 @@ static const char* _cufftGetErrorEnum(cufftResult_t error) return "cufft_no_workspace"; case CUFFT_NOT_IMPLEMENTED: return "cufft_not_implemented"; +#ifndef USE_HIP case CUFFT_LICENSE_ERROR: return "cufft_license_error"; +#endif case CUFFT_NOT_SUPPORTED: return "cufft_not_supported"; } diff --git a/sites/make.inc.olcf_spock b/sites/make.inc.olcf_spock new file mode 100644 index 00000000..05461b1e --- /dev/null +++ b/sites/make.inc.olcf_spock @@ -0,0 +1,7 @@ +# CC := hipcc +CXX := hipcc +NVCC := hipcc +NVARCH := --amdgpu-target=gfx908 +CXXFLAGS += -DUSE_HIP -Icontrib -I$(ROCM_PATH)/hipfft/include +NVCCFLAGS := -std=c++14 -O3 $(NVARCH) $(CXXFLAGS) +DEVFLAGS := -fPIC diff --git a/src/2d/cufinufft2d.cu b/src/2d/cufinufft2d.cu index 2eb6e3a3..369436ed 100644 --- a/src/2d/cufinufft2d.cu +++ b/src/2d/cufinufft2d.cu @@ -1,9 +1,9 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include -#include #include #include "../cuspreadinterp.h" diff --git a/src/2d/interp2d_wrapper.cu b/src/2d/interp2d_wrapper.cu index c8c649d8..ce2f154c 100644 --- a/src/2d/interp2d_wrapper.cu +++ b/src/2d/interp2d_wrapper.cu @@ -1,8 +1,8 @@ +#include "cuda_hip_wrapper.h" #include #include #include -#include #include "../cuspreadinterp.h" #include "../memtransfer.h" #include diff --git a/src/2d/spread2d_wrapper.cu b/src/2d/spread2d_wrapper.cu index da5d1de3..9d3ad811 100644 --- a/src/2d/spread2d_wrapper.cu +++ b/src/2d/spread2d_wrapper.cu @@ -1,3 +1,4 @@ +#include "cuda_hip_wrapper.h" #include #include #include @@ -6,7 +7,6 @@ #include #include -#include #include "../cuspreadinterp.h" #include "../memtransfer.h" diff --git a/src/2d/spread2d_wrapper_paul.cu b/src/2d/spread2d_wrapper_paul.cu index 5dbd4f1e..067ea569 100644 --- a/src/2d/spread2d_wrapper_paul.cu +++ b/src/2d/spread2d_wrapper_paul.cu @@ -1,3 +1,4 @@ +#include "cuda_hip_wrapper.h" #include #include #include @@ -6,7 +7,6 @@ #include #include -#include #include "../cuspreadinterp.h" #include "../memtransfer.h" diff --git a/src/2d/spreadinterp2d.cu b/src/2d/spreadinterp2d.cu index bbe3c6bd..ca1744ce 100644 --- a/src/2d/spreadinterp2d.cu +++ b/src/2d/spreadinterp2d.cu @@ -1,7 +1,7 @@ #include #include #include -#include +#include "cuda_hip_wrapper.h" #include #include "../../contrib/utils.h" #include "../../contrib/utils_fp.h" diff --git a/src/3d/cufinufft3d.cu b/src/3d/cufinufft3d.cu index 6f2fa76a..4988a5db 100644 --- a/src/3d/cufinufft3d.cu +++ b/src/3d/cufinufft3d.cu @@ -1,9 +1,9 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include -#include #include #include "../cuspreadinterp.h" diff --git a/src/3d/interp3d_wrapper.cu b/src/3d/interp3d_wrapper.cu index 8a93425a..a0f73b80 100644 --- a/src/3d/interp3d_wrapper.cu +++ b/src/3d/interp3d_wrapper.cu @@ -1,8 +1,8 @@ +#include "cuda_hip_wrapper.h" #include #include #include -#include #include "../cuspreadinterp.h" #include "../memtransfer.h" #include diff --git a/src/3d/spread3d_wrapper.cu b/src/3d/spread3d_wrapper.cu index 2810c8e1..41ddc70c 100644 --- a/src/3d/spread3d_wrapper.cu +++ b/src/3d/spread3d_wrapper.cu @@ -1,3 +1,4 @@ +#include "cuda_hip_wrapper.h" #include #include #include @@ -6,7 +7,6 @@ #include #include -#include #include "../cuspreadinterp.h" #include "../memtransfer.h" #include "../precision_independent.h" diff --git a/src/3d/spreadinterp3d.cu b/src/3d/spreadinterp3d.cu index 73c1386f..b3229861 100644 --- a/src/3d/spreadinterp3d.cu +++ b/src/3d/spreadinterp3d.cu @@ -1,7 +1,7 @@ #include #include #include -#include +#include "cuda_hip_wrapper.h" #include "../../contrib/utils.h" #include "../../contrib/utils_fp.h" #include "../cuspreadinterp.h" diff --git a/src/cufinufft.cu b/src/cufinufft.cu index 2b436da7..144503e4 100644 --- a/src/cufinufft.cu +++ b/src/cufinufft.cu @@ -1,9 +1,9 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include -#include #include #include "cuspreadinterp.h" diff --git a/src/deconvolve_wrapper.cu b/src/deconvolve_wrapper.cu index d5cf6c6c..f3c93ba2 100644 --- a/src/deconvolve_wrapper.cu +++ b/src/deconvolve_wrapper.cu @@ -1,9 +1,8 @@ -#include +#include "cuda_hip_wrapper.h" #include #include #include -#include #include "cudeconvolve.h" using namespace std; diff --git a/src/memtransfer_wrapper.cu b/src/memtransfer_wrapper.cu index 7a0c68f6..0743e921 100644 --- a/src/memtransfer_wrapper.cu +++ b/src/memtransfer_wrapper.cu @@ -1,8 +1,8 @@ +#include "cuda_hip_wrapper.h" #include #include #include -#include #include "memtransfer.h" using namespace std; diff --git a/src/profile.cu b/src/profile.cu index 9bd40f34..998a699e 100644 --- a/src/profile.cu +++ b/src/profile.cu @@ -1,5 +1,7 @@ #include +#ifndef USE_HIP #include +#endif #include @@ -7,6 +9,7 @@ const uint32_t colors[] = { 0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff }; const int num_colors = sizeof(colors)/sizeof(uint32_t); +#ifndef USE_HIP #define PUSH_RANGE(name,cid) { \ int color_id = cid; \ color_id = color_id%num_colors;\ @@ -20,13 +23,18 @@ const int num_colors = sizeof(colors)/sizeof(uint32_t); nvtxRangePushEx(&eventAttrib); \ } #define POP_RANGE nvtxRangePop(); +#endif CudaTracer::CudaTracer(const char* name, int cid) { +#ifndef USE_HIP PUSH_RANGE(name,cid); +#endif } CudaTracer::~CudaTracer() { +#ifndef USE_HIP POP_RANGE; +#endif } From f768ea8ef19e3db7a460ebec9bafdfdf6ae3ae4e Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 11 Nov 2021 12:53:10 -0500 Subject: [PATCH 02/11] Fix linking in HIP. --- Makefile | 2 +- python/cufinufft/requirements.txt | 1 - sites/make.inc.olcf_spock | 5 +++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 4ddc3ac3..1d7a770b 100644 --- a/Makefile +++ b/Makefile @@ -64,7 +64,7 @@ ifdef NVCC_STUBS NVCC_LIBS_PATH += -L$(NVCC_STUBS) endif -LIBS += -lm -lcudart -lstdc++ -lnvToolsExt -lcufft -lcuda +LIBS ?= -lm -lcudart -lstdc++ -lnvToolsExt -lcufft -lcuda ############################################################# diff --git a/python/cufinufft/requirements.txt b/python/cufinufft/requirements.txt index aaa51147..223a9a4e 100644 --- a/python/cufinufft/requirements.txt +++ b/python/cufinufft/requirements.txt @@ -1,3 +1,2 @@ numpy<1.22 -pycuda six diff --git a/sites/make.inc.olcf_spock b/sites/make.inc.olcf_spock index 05461b1e..31c4e1dc 100644 --- a/sites/make.inc.olcf_spock +++ b/sites/make.inc.olcf_spock @@ -2,6 +2,7 @@ CXX := hipcc NVCC := hipcc NVARCH := --amdgpu-target=gfx908 -CXXFLAGS += -DUSE_HIP -Icontrib -I$(ROCM_PATH)/hipfft/include +CXXFLAGS += -fPIC -DUSE_HIP -Icontrib -I$(ROCM_PATH)/hipfft/include NVCCFLAGS := -std=c++14 -O3 $(NVARCH) $(CXXFLAGS) -DEVFLAGS := -fPIC +DEVFLAGS := -fgpu-rdc +LIBS := -fgpu-rdc -L$(ROCM_PATH)/hipfft/lib -lhipfft From eda340df38db8b7f3c102deddc103274f5b6d6dd Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 11 Nov 2021 13:27:58 -0500 Subject: [PATCH 03/11] Fix some tests. --- contrib/cuda_hip_wrapper.h | 6 ++++++ test/cufinufft2d1_test.cu | 1 + test/cufinufft2d1many_test.cu | 1 + test/cufinufft2d1nupts_test.cu | 1 + test/cufinufft2d2_test.cu | 1 + test/cufinufft2d2api_test.cu | 1 + test/cufinufft2d2api_test_32.cu | 1 + test/cufinufft2d2many_test.cu | 1 + test/cufinufft3d1_test.cu | 1 + test/cufinufft3d2_test.cu | 1 + test/interp2d_test.cu | 1 + test/interp3d_test.cu | 1 + test/spread2d_test.cu | 1 + test/spread3d_test.cu | 1 + 14 files changed, 19 insertions(+) diff --git a/contrib/cuda_hip_wrapper.h b/contrib/cuda_hip_wrapper.h index 92b02245..45a373a0 100644 --- a/contrib/cuda_hip_wrapper.h +++ b/contrib/cuda_hip_wrapper.h @@ -8,17 +8,22 @@ #include // cuda.h adapters +#define cudaDeviceReset hipDeviceReset #define cudaDeviceSynchronize hipDeviceSynchronize #define cudaError_t hipError_t #define cudaEventCreate hipEventCreate +#define cudaEventElapsedTime hipEventElapsedTime #define cudaEventRecord hipEventRecord +#define cudaEventSynchronize hipEventSynchronize #define cudaEvent_t hipEvent_t #define cudaFree hipFree +#define cudaFreeHost hipHostFree // hipFreeHost is deprecated #define cudaGetDevice hipGetDevice #define cudaGetErrorName hipGetErrorName #define cudaGetErrorString hipGetErrorString #define cudaGetLastError hipGetLastError #define cudaMalloc hipMalloc +#define cudaMallocHost hipHostMalloc // hipMallocHost is deprecated #define cudaMemcpy hipMemcpy #define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost #define cudaMemcpyHostToDevice hipMemcpyHostToDevice @@ -57,6 +62,7 @@ #define cufftExecC2C hipfftExecC2C #define cufftExecZ2Z hipfftExecZ2Z #define cufftHandle hipfftHandle +#define cufftPlan1d hipfftPlan1d #define cufftPlanMany hipfftPlanMany #define cufftResult_t hipfftResult_t diff --git a/test/cufinufft2d1_test.cu b/test/cufinufft2d1_test.cu index 54c45fe4..1f003150 100644 --- a/test/cufinufft2d1_test.cu +++ b/test/cufinufft2d1_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include diff --git a/test/cufinufft2d1many_test.cu b/test/cufinufft2d1many_test.cu index 6a3c73ee..92aa3c0e 100644 --- a/test/cufinufft2d1many_test.cu +++ b/test/cufinufft2d1many_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include diff --git a/test/cufinufft2d1nupts_test.cu b/test/cufinufft2d1nupts_test.cu index 755fe7e4..0e62283b 100644 --- a/test/cufinufft2d1nupts_test.cu +++ b/test/cufinufft2d1nupts_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include diff --git a/test/cufinufft2d2_test.cu b/test/cufinufft2d2_test.cu index f51f736b..c53d385b 100644 --- a/test/cufinufft2d2_test.cu +++ b/test/cufinufft2d2_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include diff --git a/test/cufinufft2d2api_test.cu b/test/cufinufft2d2api_test.cu index 6ffecf2c..d031ab32 100644 --- a/test/cufinufft2d2api_test.cu +++ b/test/cufinufft2d2api_test.cu @@ -4,6 +4,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include diff --git a/test/cufinufft2d2api_test_32.cu b/test/cufinufft2d2api_test_32.cu index a0e19ccb..b940de90 100644 --- a/test/cufinufft2d2api_test_32.cu +++ b/test/cufinufft2d2api_test_32.cu @@ -10,6 +10,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include diff --git a/test/cufinufft2d2many_test.cu b/test/cufinufft2d2many_test.cu index ab063406..a49ca370 100644 --- a/test/cufinufft2d2many_test.cu +++ b/test/cufinufft2d2many_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include diff --git a/test/cufinufft3d1_test.cu b/test/cufinufft3d1_test.cu index 1d7527ab..a127adcb 100644 --- a/test/cufinufft3d1_test.cu +++ b/test/cufinufft3d1_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include diff --git a/test/cufinufft3d2_test.cu b/test/cufinufft3d2_test.cu index 37f4be5a..c6535b79 100644 --- a/test/cufinufft3d2_test.cu +++ b/test/cufinufft3d2_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include #include diff --git a/test/interp2d_test.cu b/test/interp2d_test.cu index 3190dfa6..3b3bd8b5 100644 --- a/test/interp2d_test.cu +++ b/test/interp2d_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include #include "../src/cuspreadinterp.h" diff --git a/test/interp3d_test.cu b/test/interp3d_test.cu index b96143df..13c0c39d 100644 --- a/test/interp3d_test.cu +++ b/test/interp3d_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include #include "../src/cuspreadinterp.h" diff --git a/test/spread2d_test.cu b/test/spread2d_test.cu index aa70c8e3..1840563f 100644 --- a/test/spread2d_test.cu +++ b/test/spread2d_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include #include diff --git a/test/spread3d_test.cu b/test/spread3d_test.cu index 853f4b44..6a3efa5b 100644 --- a/test/spread3d_test.cu +++ b/test/spread3d_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include #include "../src/cuspreadinterp.h" From 2fefe3d63bdd6da46de95ab56b31c1b597687ff0 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 12 Nov 2021 16:20:10 -0500 Subject: [PATCH 04/11] Fix CUDA include path and HIP linkage. --- Makefile | 8 +++++--- sites/make.inc.olcf_spock | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 1d7a770b..c02b471d 100644 --- a/Makefile +++ b/Makefile @@ -52,6 +52,7 @@ else endif # Common includes +INC ?= -I$(CUDA_ROOT)/include INC += -Icontrib/cuda_samples # NVCC-specific libs @@ -74,7 +75,8 @@ LIBS ?= -lm -lcudart -lstdc++ -lnvToolsExt -lcufft -lcuda # Include header files INC += -I include -LIBNAME=libcufinufft +LIBNAME_SHORT=cufinufft +LIBNAME=lib$(LIBNAME_SHORT) DYNAMICLIB=lib/$(LIBNAME).so STATICLIB=lib-static/$(LIBNAME).a @@ -159,11 +161,11 @@ examples: $(BINDIR)/example2d1many \ $(BINDIR)/example%: examples/example%.cpp $(DYNAMICLIB) $(HEADERS) mkdir -p $(BINDIR) - $(NVCC) $(NVCCFLAGS) $(INC) $(LIBS) -o $@ $< $(DYNAMICLIB) + $(NVCC) $(NVCCFLAGS) $(INC) $(LIBS) -o $@ $< -Llib -l$(LIBNAME_SHORT) $(BINDIR)/cufinufft2d2api_test%: test/cufinufft2d2api_test%.o $(DYNAMICLIB) mkdir -p $(BINDIR) - $(NVCC) $(NVCCFLAGS) $(LIBS) -o $@ $< $(DYNAMICLIB) + $(NVCC) $(NVCCFLAGS) $(LIBS) -o $@ $< -Llib -l$(LIBNAME_SHORT) $(BINDIR)/%_32: test/%_32.o $(CUFINUFFTOBJS_32) $(CUFINUFFTOBJS) mkdir -p $(BINDIR) diff --git a/sites/make.inc.olcf_spock b/sites/make.inc.olcf_spock index 31c4e1dc..a8f94c1d 100644 --- a/sites/make.inc.olcf_spock +++ b/sites/make.inc.olcf_spock @@ -5,4 +5,5 @@ NVARCH := --amdgpu-target=gfx908 CXXFLAGS += -fPIC -DUSE_HIP -Icontrib -I$(ROCM_PATH)/hipfft/include NVCCFLAGS := -std=c++14 -O3 $(NVARCH) $(CXXFLAGS) DEVFLAGS := -fgpu-rdc +INC := LIBS := -fgpu-rdc -L$(ROCM_PATH)/hipfft/lib -lhipfft From d119a0c840369d3be01ce65280e3e4b5ca3294f0 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 12 Nov 2021 16:23:21 -0500 Subject: [PATCH 05/11] More fixing paths. --- Makefile | 2 +- sites/make.inc.olcf_spock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index c02b471d..090192da 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,7 @@ endif # Common includes INC ?= -I$(CUDA_ROOT)/include -INC += -Icontrib/cuda_samples +INC += -Icontrib -Icontrib/cuda_samples # NVCC-specific libs NVCC_LIBS_PATH += -L$(CUDA_ROOT)/lib64 diff --git a/sites/make.inc.olcf_spock b/sites/make.inc.olcf_spock index a8f94c1d..703ba4a9 100644 --- a/sites/make.inc.olcf_spock +++ b/sites/make.inc.olcf_spock @@ -2,7 +2,7 @@ CXX := hipcc NVCC := hipcc NVARCH := --amdgpu-target=gfx908 -CXXFLAGS += -fPIC -DUSE_HIP -Icontrib -I$(ROCM_PATH)/hipfft/include +CXXFLAGS += -fPIC -DUSE_HIP -I$(ROCM_PATH)/hipfft/include NVCCFLAGS := -std=c++14 -O3 $(NVARCH) $(CXXFLAGS) DEVFLAGS := -fgpu-rdc INC := From 9c4f615e5243350a11a261726a116296248eaca3 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 28 Jan 2022 16:18:23 -0500 Subject: [PATCH 06/11] Site file for Crusher. --- sites/make.inc.olcf_crusher | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 sites/make.inc.olcf_crusher diff --git a/sites/make.inc.olcf_crusher b/sites/make.inc.olcf_crusher new file mode 100644 index 00000000..9e0c4f30 --- /dev/null +++ b/sites/make.inc.olcf_crusher @@ -0,0 +1,9 @@ +# CC := hipcc +CXX := hipcc +NVCC := hipcc +NVARCH := --amdgpu-target=gfx90a +CXXFLAGS += -fPIC -DUSE_HIP -I$(ROCM_PATH)/hipfft/include +NVCCFLAGS := -std=c++14 -O3 $(NVARCH) $(CXXFLAGS) +DEVFLAGS := -fgpu-rdc +INC := +LIBS := -fgpu-rdc -L$(ROCM_PATH)/hipfft/lib -lhipfft From e1e9c8f164809c131d8803c66d0c58e01793bab4 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 28 Jan 2022 19:41:36 -0500 Subject: [PATCH 07/11] Port 1d files and tests to HIP. --- src/1d/cufinufft1d.cu | 2 +- src/1d/interp1d_wrapper.cu | 3 +-- src/1d/spread1d_wrapper.cu | 2 +- src/1d/spreadinterp1d.cu | 2 +- test/cufinufft1d1_test.cu | 1 + test/cufinufft1d2_test.cu | 1 + test/interp1d_test.cu | 1 + test/spread1d_test.cu | 1 + 8 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/1d/cufinufft1d.cu b/src/1d/cufinufft1d.cu index 8b788a14..d29abe0b 100644 --- a/src/1d/cufinufft1d.cu +++ b/src/1d/cufinufft1d.cu @@ -1,9 +1,9 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include -#include #include #include "../cuspreadinterp.h" diff --git a/src/1d/interp1d_wrapper.cu b/src/1d/interp1d_wrapper.cu index bd528ca2..be2bd4a7 100644 --- a/src/1d/interp1d_wrapper.cu +++ b/src/1d/interp1d_wrapper.cu @@ -1,8 +1,7 @@ -#include +#include "cuda_hip_wrapper.h" #include #include -#include #include "../cuspreadinterp.h" #include "../memtransfer.h" #include diff --git a/src/1d/spread1d_wrapper.cu b/src/1d/spread1d_wrapper.cu index 1d31c590..bcb53ea1 100644 --- a/src/1d/spread1d_wrapper.cu +++ b/src/1d/spread1d_wrapper.cu @@ -1,3 +1,4 @@ +#include "cuda_hip_wrapper.h" #include #include #include @@ -6,7 +7,6 @@ #include #include -#include #include "../cuspreadinterp.h" #include "../memtransfer.h" diff --git a/src/1d/spreadinterp1d.cu b/src/1d/spreadinterp1d.cu index d5494d8f..ffbf95f2 100644 --- a/src/1d/spreadinterp1d.cu +++ b/src/1d/spreadinterp1d.cu @@ -1,7 +1,7 @@ #include #include +#include "cuda_hip_wrapper.h" #include -#include #include #include "../../contrib/utils.h" #include "../../contrib/utils_fp.h" diff --git a/test/cufinufft1d1_test.cu b/test/cufinufft1d1_test.cu index 94ec57df..00d07192 100644 --- a/test/cufinufft1d1_test.cu +++ b/test/cufinufft1d1_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include diff --git a/test/cufinufft1d2_test.cu b/test/cufinufft1d2_test.cu index 2f6390ef..f9944ea6 100644 --- a/test/cufinufft1d2_test.cu +++ b/test/cufinufft1d2_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include diff --git a/test/interp1d_test.cu b/test/interp1d_test.cu index 095d5311..faf5f21c 100644 --- a/test/interp1d_test.cu +++ b/test/interp1d_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include #include "../src/cuspreadinterp.h" diff --git a/test/spread1d_test.cu b/test/spread1d_test.cu index f45944e4..e8b0820e 100644 --- a/test/spread1d_test.cu +++ b/test/spread1d_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include "cuda_hip_wrapper.h" #include #include #include From e3b75f6257a774975ee00b0bed92948b625e45c6 Mon Sep 17 00:00:00 2001 From: darrenjhsu Date: Fri, 17 Jun 2022 15:59:10 -0400 Subject: [PATCH 08/11] Commented out tests that fail on Crusher ROCm/4.5.0 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 090192da..2e3c8791 100644 --- a/Makefile +++ b/Makefile @@ -240,11 +240,11 @@ check2D_64: spreadtest libtest bin/cufinufft2d1many_test 2 512 512 256 bin/cufinufft2d1many_test 1 1e2 2e2 3e2 16 1e4 bin/cufinufft2d1many_test 2 1e2 2e2 3e2 16 1e4 - bin/cufinufft2d2many_test 1 64 64 128 1e-3 + #bin/cufinufft2d2many_test 1 64 64 128 1e-3 # This test fails on Crusher ROCm/4.5.0 bin/cufinufft2d2many_test 1 256 256 1024 bin/cufinufft2d2many_test 2 512 512 256 bin/cufinufft2d2many_test 1 256 256 1024 - bin/cufinufft2d2many_test 1 1e2 2e2 3e2 16 1e4 + #bin/cufinufft2d2many_test 1 1e2 2e2 3e2 16 1e4 # This test fails on Crusher ROCm/4.5.0 bin/cufinufft2d2many_test 2 1e2 2e2 3e2 16 1e4 check2D_32: spreadtest libtest From 70edb3a25a9779186a0659fe56c023017f226001 Mon Sep 17 00:00:00 2001 From: darrenjhsu Date: Fri, 17 Jun 2022 15:59:59 -0400 Subject: [PATCH 09/11] Change PyCUDA dependency to PyBindGPU --- examples/example2d1many.py | 7 +++++-- examples/example2d2many.py | 7 +++++-- python/cufinufft/tests/test_basic.py | 6 ++++-- python/cufinufft/tests/test_error_checks.py | 5 +++-- python/cufinufft/tests/test_multi.py | 5 +++-- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/examples/example2d1many.py b/examples/example2d1many.py index 0fad05bf..8ba054a9 100644 --- a/examples/example2d1many.py +++ b/examples/example2d1many.py @@ -4,8 +4,11 @@ import numpy as np -import pycuda.autoinit -from pycuda.gpuarray import GPUArray, to_gpu +#import PyGPU as gpuarray +from PyGPU import GPUArray, to_gpu + +#import pycuda.autoinit +#from pycuda.gpuarray import GPUArray, to_gpu from cufinufft import cufinufft diff --git a/examples/example2d2many.py b/examples/example2d2many.py index 95d8d212..ba98ba89 100644 --- a/examples/example2d2many.py +++ b/examples/example2d2many.py @@ -4,8 +4,11 @@ import numpy as np -import pycuda.autoinit -from pycuda.gpuarray import GPUArray, to_gpu +import PyGPU as gpuarray +from PyGPU.gpuarray import GPUArray, to_gpu + +#import pycuda.autoinit +#from pycuda.gpuarray import GPUArray, to_gpu from cufinufft import cufinufft diff --git a/python/cufinufft/tests/test_basic.py b/python/cufinufft/tests/test_basic.py index bc8257e8..aff8997f 100644 --- a/python/cufinufft/tests/test_basic.py +++ b/python/cufinufft/tests/test_basic.py @@ -1,7 +1,9 @@ import numpy as np -import pycuda.autoinit # NOQA:401 -import pycuda.gpuarray as gpuarray +import PyGPU as gpuarray + +#import pycuda.autoinit # NOQA:401 +#import pycuda.gpuarray as gpuarray from cufinufft import cufinufft diff --git a/python/cufinufft/tests/test_error_checks.py b/python/cufinufft/tests/test_error_checks.py index 89e4c9d4..62822721 100644 --- a/python/cufinufft/tests/test_error_checks.py +++ b/python/cufinufft/tests/test_error_checks.py @@ -1,8 +1,9 @@ import numpy as np import pytest -import pycuda.autoinit # NOQA:401 -import pycuda.gpuarray as gpuarray +import PyGPU as gpuarray +#import pycuda.autoinit # NOQA:401 +#import pycuda.gpuarray as gpuarray from cufinufft import cufinufft diff --git a/python/cufinufft/tests/test_multi.py b/python/cufinufft/tests/test_multi.py index dbb86cbf..a7a51033 100644 --- a/python/cufinufft/tests/test_multi.py +++ b/python/cufinufft/tests/test_multi.py @@ -2,8 +2,9 @@ import numpy as np -import pycuda.driver as drv -import pycuda.gpuarray as gpuarray +import PyGPU as gpuarray +#import pycuda.driver as drv +#import pycuda.gpuarray as gpuarray from cufinufft import cufinufft From b8228decd8698dde3a496c761df876fb50774505 Mon Sep 17 00:00:00 2001 From: darrenjhsu Date: Tue, 20 Dec 2022 14:59:49 -0500 Subject: [PATCH 10/11] Make compatible with new PybindGPU --- Makefile | 4 ++-- examples/example2d1many.py | 4 ++-- examples/example2d2many.py | 5 ++--- python/cufinufft/tests/test_basic.py | 4 ++-- python/cufinufft/tests/test_error_checks.py | 2 +- python/cufinufft/tests/test_multi.py | 16 ++++++++++------ 6 files changed, 19 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 2e3c8791..64395888 100644 --- a/Makefile +++ b/Makefile @@ -240,11 +240,11 @@ check2D_64: spreadtest libtest bin/cufinufft2d1many_test 2 512 512 256 bin/cufinufft2d1many_test 1 1e2 2e2 3e2 16 1e4 bin/cufinufft2d1many_test 2 1e2 2e2 3e2 16 1e4 - #bin/cufinufft2d2many_test 1 64 64 128 1e-3 # This test fails on Crusher ROCm/4.5.0 + bin/cufinufft2d2many_test 1 64 64 128 1e-3 # This test fails on Crusher ROCm/4.5.0 bin/cufinufft2d2many_test 1 256 256 1024 bin/cufinufft2d2many_test 2 512 512 256 bin/cufinufft2d2many_test 1 256 256 1024 - #bin/cufinufft2d2many_test 1 1e2 2e2 3e2 16 1e4 # This test fails on Crusher ROCm/4.5.0 + bin/cufinufft2d2many_test 1 1e2 2e2 3e2 16 1e4 # This test fails on Crusher ROCm/4.5.0 bin/cufinufft2d2many_test 2 1e2 2e2 3e2 16 1e4 check2D_32: spreadtest libtest diff --git a/examples/example2d1many.py b/examples/example2d1many.py index 8ba054a9..4c38c89b 100644 --- a/examples/example2d1many.py +++ b/examples/example2d1many.py @@ -4,8 +4,8 @@ import numpy as np -#import PyGPU as gpuarray -from PyGPU import GPUArray, to_gpu +import PybindGPU.gpuarray as gpuarray +from PybindGPU.gpuarray import GPUArray, to_gpu #import pycuda.autoinit #from pycuda.gpuarray import GPUArray, to_gpu diff --git a/examples/example2d2many.py b/examples/example2d2many.py index ba98ba89..5bb3049f 100644 --- a/examples/example2d2many.py +++ b/examples/example2d2many.py @@ -4,9 +4,8 @@ import numpy as np -import PyGPU as gpuarray -from PyGPU.gpuarray import GPUArray, to_gpu - +import PybindGPU.gpuarray as gpuarray +from PybindGPU.gpuarray import GPUArray, to_gpu #import pycuda.autoinit #from pycuda.gpuarray import GPUArray, to_gpu diff --git a/python/cufinufft/tests/test_basic.py b/python/cufinufft/tests/test_basic.py index aff8997f..2c8f0dd5 100644 --- a/python/cufinufft/tests/test_basic.py +++ b/python/cufinufft/tests/test_basic.py @@ -1,7 +1,7 @@ import numpy as np -import PyGPU as gpuarray - +#import PybindGPU +import PybindGPU.gpuarray as gpuarray #import pycuda.autoinit # NOQA:401 #import pycuda.gpuarray as gpuarray diff --git a/python/cufinufft/tests/test_error_checks.py b/python/cufinufft/tests/test_error_checks.py index 62822721..86838e99 100644 --- a/python/cufinufft/tests/test_error_checks.py +++ b/python/cufinufft/tests/test_error_checks.py @@ -1,7 +1,7 @@ import numpy as np import pytest -import PyGPU as gpuarray +import PybindGPU.gpuarray as gpuarray #import pycuda.autoinit # NOQA:401 #import pycuda.gpuarray as gpuarray diff --git a/python/cufinufft/tests/test_multi.py b/python/cufinufft/tests/test_multi.py index a7a51033..11309ae5 100644 --- a/python/cufinufft/tests/test_multi.py +++ b/python/cufinufft/tests/test_multi.py @@ -2,7 +2,8 @@ import numpy as np -import PyGPU as gpuarray +import PybindGPU +import PybindGPU.gpuarray as gpuarray #import pycuda.driver as drv #import pycuda.gpuarray as gpuarray @@ -14,21 +15,24 @@ def test_multi_type1(dtype=np.float32, shape=(16, 16, 16), M=4096, tol=1e-3): complex_dtype = utils._complex_dtype(dtype) - drv.init() + #drv.init() - dev_count = drv.Device.count() + #dev_count = drv.Device.count() + dev_count, err = PybindGPU.cudaGetDeviceCount() if dev_count == 1: pytest.skip() - devs = [drv.Device(dev_id) for dev_id in range(dev_count)] + #devs = [drv.Device(dev_id) for dev_id in range(dev_count)] + devs = [PybindGPU.cudaDeviceProp(i) for i in range(dev_count)] dim = len(shape) errs = [] for dev_id, dev in enumerate(devs): - ctx = dev.make_context() + PybindGPU.cudaSetDevice(dev_id) + #ctx = dev.make_context() k = utils.gen_nu_pts(M, dim=dim).astype(dtype) c = utils.gen_nonuniform_data(M).astype(complex_dtype) @@ -55,7 +59,7 @@ def test_multi_type1(dtype=np.float32, shape=(16, 16, 16), M=4096, tol=1e-3): print(f'Type 1 relative error (GPU {dev_id}):', type1_rel_err) - ctx.pop() + #ctx.pop() errs.append(type1_rel_err) From 7bfa316a474c6bf2ce185e2317c98d08dab3be26 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 21 Apr 2023 18:11:03 -0400 Subject: [PATCH 11/11] Updates for ROCm 5.4.3. --- sites/make.inc.olcf_crusher | 6 +++--- src/precision_independent.cu | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sites/make.inc.olcf_crusher b/sites/make.inc.olcf_crusher index 9e0c4f30..5cfc33d2 100644 --- a/sites/make.inc.olcf_crusher +++ b/sites/make.inc.olcf_crusher @@ -1,9 +1,9 @@ # CC := hipcc CXX := hipcc NVCC := hipcc -NVARCH := --amdgpu-target=gfx90a -CXXFLAGS += -fPIC -DUSE_HIP -I$(ROCM_PATH)/hipfft/include +NVARCH := --offload-arch=gfx90a +CXXFLAGS += -fPIC -DUSE_HIP -I$(ROCM_PATH)/include NVCCFLAGS := -std=c++14 -O3 $(NVARCH) $(CXXFLAGS) DEVFLAGS := -fgpu-rdc INC := -LIBS := -fgpu-rdc -L$(ROCM_PATH)/hipfft/lib -lhipfft +LIBS := -fgpu-rdc -L$(ROCM_PATH)/lib -lhipfft diff --git a/src/precision_independent.cu b/src/precision_independent.cu index 11adb599..04350ef9 100644 --- a/src/precision_independent.cu +++ b/src/precision_independent.cu @@ -5,6 +5,7 @@ #include #include +#include #include "precision_independent.h"