Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cuda array interface #326

Merged
merged 37 commits into from
Sep 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
7b07d0f
python: add hackish way to support more than pycuda arrays
blackwer Jul 25, 2023
71fa58a
cuda-python: remove unused import
blackwer Jul 25, 2023
98efe74
cuda-python: make gpu array handling slightly less hacky
blackwer Jul 25, 2023
6dd63cb
cuda: revert changes to exception strings
blackwer Jul 26, 2023
e2cbfa2
cuda-python: check if array is none rather than its truthiness
blackwer Jul 26, 2023
f8b04e3
cuda-python: handle torch arrays with gradient enabled
blackwer Jul 26, 2023
6f984cb
cuda-python: simplify compatibility layer
janden Aug 22, 2023
1c437c2
cuda-python: factor out compatibility layer
janden Aug 22, 2023
926b3ad
cuda-python: missed torch import
janden Aug 22, 2023
be2411d
cuda-python: multiple frameworks in unit tests
janden Aug 22, 2023
ae39257
tests: add other frameworks to Jenkinsfile
janden Aug 22, 2023
0154868
tests: install all CUDA libraries
janden Aug 22, 2023
9eebe55
tests: hardcode compute capability
janden Aug 22, 2023
4244540
tests: install latest torch version for cu110
janden Aug 22, 2023
15e3d0f
cuda-python: use `torch.as_tensor`
janden Aug 22, 2023
8371776
cuda-python: copy if not contiguous
janden Aug 23, 2023
8df027a
cuda-python: fix bug in Plan
janden Aug 23, 2023
ab64862
tests: test non-contiguous arrays
janden Aug 23, 2023
baa863b
tests: add `--framework` opt to pytest
janden Aug 24, 2023
593abf0
tests: update Jenkins to run different frameworks
janden Aug 24, 2023
79df4a3
tests: check for ordering error in pycuda
janden Aug 24, 2023
a21dcca
tests: remove `test_type2_ordering`
janden Aug 24, 2023
680d293
tests: move `transfer_funcs` into `util`
janden Aug 24, 2023
6aaea6b
tests: parametrize `test_opts` by framework
janden Aug 24, 2023
d046c75
tests: `test_type1_ordering` for other frameworks
janden Aug 24, 2023
9387e02
cuda-python: fix bug in torch dimension check
janden Aug 24, 2023
709791e
tests: introduce `to_gpu` and `to_cpu` fixtures
janden Aug 24, 2023
f31cfd0
tests: parametrize `test_error_checks`
janden Aug 24, 2023
3e32e01
tests: parametrize `test_multi`
janden Aug 24, 2023
d9c3036
cuda-python: rename examples
janden Aug 24, 2023
abb43c3
tests: only run examples from approved frameworks
janden Aug 24, 2023
97b0e21
cuda-python: make `_simple` torch-compatible
janden Aug 24, 2023
eb153f8
tests: fix wrong dtype check
janden Aug 24, 2023
56fad53
tests: parametrize simple interfaces tests
janden Aug 24, 2023
3911ebc
cuda-python: remove pycuda from requirements
janden Aug 24, 2023
8f2a643
Contrain jenkins to use v100 GPUs
blackwer Aug 24, 2023
e148b7d
cuda: jenkins: build with cuda arch 70 (v100)
blackwer Aug 24, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pipeline {
dockerfile {
filename 'tools/cufinufft/docker/cuda11.0/Dockerfile-x86_64'
args '--gpus 2'
label 'v100'
}
}
environment {
Expand All @@ -27,7 +28,9 @@ pipeline {
echo $HOME
'''
sh '''#!/bin/bash -ex
cuda_arch=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader|head -n 1| sed "s/\\.//")
# Oldest card in the Jenkins pool is a K40
cuda_arch="70"

cmake -B build . -DFINUFFT_USE_CUDA=ON \
-DFINUFFT_USE_CPU=OFF \
-DFINUFFT_BUILD_TESTS=ON \
Expand All @@ -44,9 +47,14 @@ pipeline {
sh '''#!/bin/bash -ex
source $HOME/bin/activate
python3 -m pip install --upgrade pip
python3 -m pip install --upgrade pycuda cupy-cuda110 numba
python3 -m pip install torch==1.7.1+cu110 -f https://download.pytorch.org/whl/torch_stable.html
python3 -m pip install -e python/cufinufft
python3 -m pip install pytest
python3 -m pytest python/cufinufft
python3 -m pytest --framework=pycuda python/cufinufft
python3 -m pytest --framework=numba python/cufinufft
python3 -m pytest --framework=cupy python/cufinufft
python3 -m pytest --framework=torch python/cufinufft
'''
}
}
Expand Down
106 changes: 106 additions & 0 deletions python/cufinufft/cufinufft/_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import inspect

import numpy as np


def get_array_ptr(data):
try:
return data.__cuda_array_interface__['data'][0]
except RuntimeError:
# Handle torch with gradient enabled
# https://github.com/flatironinstitute/finufft/pull/326#issuecomment-1652212770
return data.data_ptr()
except AttributeError:
raise TypeError("Invalid GPU array implementation. Implementation must implement the standard cuda array interface.")


def get_array_module(obj):
module_name = inspect.getmodule(type(obj)).__name__

if module_name.startswith("numba.cuda"):
return "numba"
elif module_name.startswith("torch"):
return "torch"
elif module_name.startswith("pycuda"):
return "pycuda"
else:
return "generic"


def get_array_size(obj):
array_module = get_array_module(obj)

if array_module == "torch":
return len(obj)
else:
return obj.size


def get_array_dtype(obj):
array_module = get_array_module(obj)

if array_module == "torch":
dtype_str = str(obj.dtype)
dtype_str = dtype_str[len("torch."):]
return np.dtype(dtype_str)
else:
return obj.dtype


def is_array_contiguous(obj):
array_module = get_array_module(obj)

if array_module == "numba":
return obj.is_c_contiguous()
elif array_module == "torch":
return obj.is_contiguous()
else:
return obj.flags.c_contiguous


def array_can_contiguous(obj):
array_module = get_array_module(obj)

if array_module == "pycuda":
return False
else:
return True


def array_contiguous(obj):
array_module = get_array_module(obj)

if array_module == "numba":
import numba
ret = numba.cuda.device_array(obj.shape, obj.dtype, stream=obj.stream)
ret[:] = obj[:]
return ret
if array_module == "torch":
return obj.contiguous()
else:
return obj.copy(order="C")


def array_empty_like(obj, *args, **kwargs):
module_name = get_array_module(obj)

if module_name == "numba":
import numba.cuda
return numba.cuda.device_array(*args, **kwargs)
elif module_name == "torch":
import torch
if "shape" in kwargs:
kwargs["size"] = kwargs.pop("shape")
if "dtype" in kwargs:
dtype = kwargs.pop("dtype")
if dtype == np.complex64:
dtype = torch.complex64
elif dtype == np.complex128:
dtype = torch.complex128
kwargs["dtype"] = dtype
if "device" not in kwargs:
kwargs["device"] = obj.device

return torch.empty(*args, **kwargs)
else:
return type(obj)(*args, **kwargs)
2 changes: 0 additions & 2 deletions python/cufinufft/cufinufft/_cufinufft.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
warnings.filterwarnings("ignore", category=DeprecationWarning)
import imp

import numpy as np

from ctypes import c_double
from ctypes import c_int
from ctypes import c_int64
Expand Down
47 changes: 21 additions & 26 deletions python/cufinufft/cufinufft/_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from cufinufft._cufinufft import _destroy_plan
from cufinufft._cufinufft import _destroy_planf

from pycuda.gpuarray import GPUArray
from cufinufft import _compat


# If we are shutting down python, we don't need to run __del__
Expand Down Expand Up @@ -206,7 +206,7 @@ def setpts(self, x, y=None, z=None, s=None, t=None, u=None):

_x, _y, _z = _ensure_valid_pts(_x, _y, _z, self.dim)

M = _x.size
M = _compat.get_array_size(_x)

# Because FINUFFT/cufinufft are internally column major,
# we will reorder the pts axes. Reordering references
Expand All @@ -217,17 +217,17 @@ def setpts(self, x, y=None, z=None, s=None, t=None, u=None):
# (x, y, None) ~> (y, x, None)
# (x, y, z) ~> (z, y, x)
# Via code, we push each dimension onto a stack of axis
fpts_axes = [_x.ptr, None, None]
fpts_axes = [_compat.get_array_ptr(_x), None, None]

# We will also store references to these arrays.
# This keeps python from prematurely cleaning them up.
self._references.append(_x)
if self.dim >= 2:
fpts_axes.insert(0, _y.ptr)
fpts_axes.insert(0, _compat.get_array_ptr(_y))
self._references.append(_y)

if self.dim >= 3:
fpts_axes.insert(0, _z.ptr)
fpts_axes.insert(0, _compat.get_array_ptr(_z))
self._references.append(_z)

# Then take three items off the stack as our reordered axis.
Expand Down Expand Up @@ -278,14 +278,16 @@ def execute(self, data, out=None):
req_out_shape = batch_shape + req_out_shape

if out is None:
_out = GPUArray(req_out_shape, dtype=self.dtype)
_out = _compat.array_empty_like(_data, req_out_shape, dtype=self.dtype)
else:
_out = _ensure_array_shape(_out, "out", req_out_shape)

if self.type == 1:
ier = self._exec_plan(self._plan, data.ptr, _out.ptr)
ier = self._exec_plan(self._plan, _compat.get_array_ptr(_data),
_compat.get_array_ptr(_out))
elif self.type == 2:
ier = self._exec_plan(self._plan, _out.ptr, data.ptr)
ier = self._exec_plan(self._plan, _compat.get_array_ptr(_out),
_compat.get_array_ptr(_data))

if ier != 0:
raise RuntimeError('Error executing plan.')
Expand Down Expand Up @@ -315,27 +317,21 @@ def __del__(self):

def _ensure_array_type(x, name, dtype, output=False):
if x is None:
return GPUArray(0, dtype=dtype, order="C")
return None

if x.dtype != dtype:
if _compat.get_array_dtype(x) != dtype:
raise TypeError(f"Argument `{name}` does not have the correct dtype: "
f"{x.dtype} was given, but {dtype} was expected.")

if not x.flags.c_contiguous:
if output:
if not _compat.is_array_contiguous(x):
if output or not _compat.array_can_contiguous(x):
raise TypeError(f"Argument `{name}` does not satisfy the "
f"following requirement: C")
else:
raise TypeError(f"Argument `{name}` does not satisfy the "
f"following requirement: C")

# Ideally we'd copy the array into the correct ordering here, but
# this does not seem possible as of pycuda 2022.2.2.

# warnings.warn(f"Argument `{name}` does not satisfy the "
# f"following requirement: C. Copying array (this may
# reduce performance)")
# x = gpuarray.GPUArray(x, dtype=dtype, order="C")
warnings.warn(f"Argument `{name}` does not satisfy the "
f"following requirement: C. Copying array "
f"(this may reduce performance)")
x = _compat.array_contiguous(x)

return x

Expand All @@ -354,22 +350,21 @@ def _ensure_array_shape(x, name, shape, allow_reshape=False):
else:
return x


def _ensure_valid_pts(x, y, z, dim):
if x.ndim != 1:
raise TypeError(f"Argument `x` must be a vector")

M = x.size

if dim >= 2:
y = _ensure_array_shape(y, "y", x.shape)

if dim >= 3:
z = _ensure_array_shape(z, "z", x.shape)

if dim < 3 and z.size > 0:
if dim < 3 and z is not None and _compat.get_array_size(z) > 0:
raise TypeError(f"Plan dimension is {dim}, but `z` was specified")

if dim < 2 and y.size > 0:
if dim < 2 and y is not None and _compat.get_array_size(y) > 0:
raise TypeError(f"Plan dimension is {dim}, but `y` was specified")

return x, y, z
4 changes: 2 additions & 2 deletions python/cufinufft/cufinufft/_simple.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from cufinufft import Plan
from cufinufft import Plan, _compat

def nufft1d1(x, data, n_modes=None, out=None, eps=1e-6, isign=1, **kwargs):
return _invoke_plan(1, 1, x, None, None, data, out, isign, eps, n_modes,
Expand All @@ -24,7 +24,7 @@ def nufft3d2(x, y, z, data, out=None, eps=1e-6, isign=-1, **kwargs):

def _invoke_plan(dim, nufft_type, x, y, z, data, out, isign, eps,
n_modes=None, kwargs=None):
dtype = data.dtype
dtype = _compat.get_array_dtype(data)

n_trans = _get_ntrans(dim, nufft_type, data)

Expand Down
1 change: 0 additions & 1 deletion python/cufinufft/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
numpy
pycuda
six
24 changes: 24 additions & 0 deletions python/cufinufft/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pytest

import utils


def pytest_addoption(parser):
parser.addoption("--framework", action="append", default=[], help="List of frameworks")

def pytest_generate_tests(metafunc):
if "framework" in metafunc.fixturenames:
metafunc.parametrize("framework", metafunc.config.getoption("framework"))

@pytest.fixture
def to_gpu(framework):
to_gpu, _ = utils.transfer_funcs(framework)

return to_gpu


@pytest.fixture
def to_cpu(framework):
_, to_cpu = utils.transfer_funcs(framework)

return to_cpu
46 changes: 6 additions & 40 deletions python/cufinufft/tests/test_array_ordering.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,60 +2,26 @@

import numpy as np

import pycuda.autoinit # NOQA:401
import pycuda.gpuarray as gpuarray

from cufinufft import Plan
from cufinufft import Plan, _compat

import utils

def test_type2_ordering(dtype=np.float32, shape=(16, 16, 16), M=4096, tol=1e-3):
complex_dtype = utils._complex_dtype(dtype)

k = utils.gen_nu_pts(M).astype(dtype)
fk = utils.gen_uniform_data(shape).astype(complex_dtype)

fkTT = fk.T.copy().T

k_gpu = gpuarray.to_gpu(k)
fk_gpu = gpuarray.to_gpu(fk)
fkTT_gpu = gpuarray.to_gpu(fkTT)

plan = Plan(2, shape, eps=tol, dtype=complex_dtype)

plan.setpts(k_gpu[0], k_gpu[1], k_gpu[2])

c_gpu = plan.execute(fk_gpu)

with pytest.raises(TypeError, match="following requirement: C") as err:
cTT_gpu = plan.execute(fkTT_gpu)

# Ideally, it should be possible to get this to align with true output,
# but corrently does not look like it.

# c = c_gpu.get()
# cTT = cTT_gpu.get()

# assert np.allclose(c, cTT, rtol=1e-2)


def test_type1_ordering(dtype=np.float32, shape=(16, 16, 16), M=4096, tol=1e-3):
def test_type1_ordering(to_gpu, to_cpu, dtype=np.float32, shape=(16, 16, 16), M=4096, tol=1e-3):
complex_dtype = utils._complex_dtype(dtype)

k, c = utils.type1_problem(dtype, shape, M)

k_gpu = gpuarray.to_gpu(k)
c_gpu = gpuarray.to_gpu(c)
k_gpu = to_gpu(k)
c_gpu = to_gpu(c)

plan = Plan(1, shape, eps=tol, dtype=complex_dtype)

plan.setpts(*k_gpu)

out_gpu = gpuarray.GPUArray(shape, dtype=complex_dtype)

plan.execute(c_gpu, out=out_gpu)
out = np.empty(shape, dtype=complex_dtype, order="F")

out_gpu = gpuarray.GPUArray(shape, dtype=complex_dtype, order="F")
out_gpu = to_gpu(out)

with pytest.raises(TypeError, match="following requirement: C") as err:
plan.execute(c_gpu, out=out_gpu)
Loading