Skip to content

Commit

Permalink
Use stream in mul_add if given and allocator in subset_sum (#438)
Browse files Browse the repository at this point in the history
* mul_add(): use stream if given, add an optional destination array.
subset_sum(): use allocator if given.

* Remove unused import, add whitespace to make flake8 happy

* Update pycuda/gpuarray.py

Co-authored-by: Andreas Klöckner <inform@tiker.net>

* test_subset_sum: also assert if allocator is used

* subset_sum: use given allocator value (even if None) like other functions

---------

Co-authored-by: Andreas Klöckner <inform@tiker.net>
  • Loading branch information
vincefn and inducer authored May 8, 2024
1 parent 795ec35 commit 8aa0766
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 4 deletions.
8 changes: 4 additions & 4 deletions pycuda/gpuarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,10 +574,10 @@ def _new_like_me(self, dtype=None, order="C"):
)

# operators ---------------------------------------------------------------
def mul_add(self, selffac, other, otherfac, add_timer=None, stream=None):
def mul_add(self, selffac, other, otherfac, add_timer=None, stream=None, out=None):
"""Return `selffac * self + otherfac*other`."""
result = self._new_like_me(_get_common_dtype(self, other))
return self._axpbyz(selffac, other, otherfac, result, add_timer)
result = out if out is not None else self._new_like_me(_get_common_dtype(self, other))
return self._axpbyz(selffac, other, otherfac, result, add_timer, stream=stream)

def __add__(self, other):
"""Add an array with an array or an array with a scalar."""
Expand Down Expand Up @@ -2087,7 +2087,7 @@ def subset_sum(subset, a, dtype=None, stream=None, allocator=None):
from pycuda.reduction import get_subset_sum_kernel

krnl = get_subset_sum_kernel(dtype, subset.dtype, a.dtype)
return krnl(subset, a, stream=stream)
return krnl(subset, a, stream=stream, allocator=allocator)


def dot(a, b, dtype=None, stream=None, allocator=None):
Expand Down
64 changes: 64 additions & 0 deletions test/test_gpuarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,26 @@ def test_multiply_array(self):
b_mul_c = (b_gpu * c_gpu).get()
assert (b * c == b_mul_c).all()

def test_mul_add(self):
"""Test the addition-multiplication of two arrays."""

a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.float32)
b = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100]).astype(np.float32)

a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = gpuarray.empty_like(a_gpu)

res = a_gpu.mul_add(2, b_gpu, 3).get()
assert (2 * a + 3 * b == res).all()

a_gpu.mul_add(2, b_gpu, 3, out=c_gpu)
assert (2 * a + 3 * b == c_gpu.get()).all()

stream = drv.Stream()
res = a_gpu.mul_add(2, b_gpu, 3, stream=stream).get()
assert (2 * a + 3 * b == res).all()

def test_unit_multiply_array(self):

a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.float32)
Expand Down Expand Up @@ -862,6 +882,50 @@ def test_subset_minmax(self):

assert min_a_gpu == min_a

def test_subset_sum(self):
"""Test subset sum with annd without allocator"""

l_a = 2000
gran = 5
l_m = l_a - l_a // gran + 1

if has_double_support():
dtypes = [np.float64, np.float32, np.int32]
else:
dtypes = [np.float32, np.int32]

import pycuda.tools
for pool in [None, pycuda.tools.DeviceMemoryPool()]:
for dtype in dtypes:
a = np.random.uniform(0, 10, l_a).astype(dtype)
a_gpu = gpuarray.to_gpu(a)

meaningful_indices_gpu = gpuarray.zeros(l_m, dtype=np.int32)
meaningful_indices = meaningful_indices_gpu.get()
j = 0
for i in range(len(meaningful_indices)):
meaningful_indices[i] = j
j = j + 1
if j % gran == 0:
j = j + 1

meaningful_indices_gpu = gpuarray.to_gpu(meaningful_indices)

sum_a = a[meaningful_indices].sum()

alloc_uses = 0

def allocator(size):
nonlocal alloc_uses, pool
alloc_uses += 1
return pool.allocate(size)

alloc = None if pool is None else allocator
sum_a_gpu = gpuarray.subset_sum(meaningful_indices_gpu, a_gpu, allocator=alloc).get()
assert np.allclose(sum_a_gpu, sum_a)
if pool is not None:
assert alloc_uses == 1

@pytest.mark.parametrize("sz", [2,
3,
4,
Expand Down

0 comments on commit 8aa0766

Please sign in to comment.