Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add test for reverse slicing #162

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions pycuda/elementwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,11 +495,15 @@ def get_fill_kernel(dtype):
@context_dependent_memoize
def get_reverse_kernel(dtype):
return get_elwise_kernel(
"%(tp)s *y, %(tp)s *z" % {
"tp": dtype_to_ctype(dtype),
},
"z[i] = y[n-1-i]",
"reverse")
"%(tp)s *y, %(tp)s *z, int skip" % {
"tp": dtype_to_ctype(dtype),
},
"""
skip = abs(skip);
size_t N = (n-1)*skip;
z[i] = y[N-i*skip];
""",
"reverse")


@context_dependent_memoize
Expand Down
31 changes: 22 additions & 9 deletions pycuda/gpuarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,16 +674,13 @@ def reverse(self, stream=None):
as one-dimensional.
"""

if not self.flags.forc:
raise RuntimeError("only contiguous arrays may "
"be used as arguments to this operation")

result = self._new_like_me()

func = elementwise.get_reverse_kernel(self.dtype)
skip = self.strides[0] // self.dtype.itemsize
func.prepared_async_call(self._grid, self._block, stream,
self.gpudata, result.gpudata,
self.mem_size)
self.gpudata, result.gpudata,
skip, self.mem_size)

return result

Expand Down Expand Up @@ -856,13 +853,24 @@ def __getitem__(self, index):
start, stop, idx_stride = index_entry.indices(
self.shape[array_axis])

# number of element
n = (abs(stop-start)-1) // idx_stride + 1

if idx_stride < 0:
# compute number of element
n = (abs(stop - start) + abs(idx_stride) - 1) // abs(idx_stride)
# compute boundaries
stop = start + 1
start = stop - (n - 1) * abs(idx_stride) - 1

array_stride = self.strides[array_axis]

new_shape.append((stop-start-1)//idx_stride+1)
new_shape.append(n)
new_strides.append(idx_stride*array_stride)
new_offset += array_stride*start

index_axis += 1

array_axis += 1

elif isinstance(index_entry, (int, np.integer)):
Expand Down Expand Up @@ -910,14 +918,19 @@ def __getitem__(self, index):

array_axis += 1

return GPUArray(
tmp = GPUArray(
shape=tuple(new_shape),
dtype=self.dtype,
allocator=self.allocator,
base=self,
gpudata=int(self.gpudata)+new_offset,
strides=tuple(new_strides))

if new_strides[0] < 0:
tmp = tmp.reverse()

return tmp

def __setitem__(self, index, value):
_memcpy_discontig(self[index], value)

Expand Down Expand Up @@ -1297,7 +1310,7 @@ def _memcpy_discontig(dst, src, async=False, stream=None):

copy.width_in_bytes = src.dtype.itemsize*shape[0]

copy.src_pitch = src_strides[1]
copy.src_pitch = abs(src_strides[1])
copy.dst_pitch = dst_strides[1]
copy.height = shape[1]

Expand Down
24 changes: 23 additions & 1 deletion test/test_gpuarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ def test_reverse(self):
b = a_cpu.get()

for i in range(0, 10):
assert a[len(a)-1-i] == b[i]
assert a[len(a)-1-i] == b[i], "%s, %s" % (a[len(a)-1-i], b[i])

@mark_cuda_test
def test_sum(self):
Expand Down Expand Up @@ -572,6 +572,28 @@ def test_slice(self):

assert la.norm(a_gpu_slice.get()-a_slice) == 0

@mark_cuda_test
def test_reverse_slice(self):
from pycuda.curandom import rand as curand

l = 10
a_gpu = curand((l,))
a = a_gpu.get()

from random import randrange
for i in range(200):
start = end = 0
while end-start < 2:
start = randrange(l)
end = randrange(start, l)

step = randrange(1, min(end-start, 20))

a_gpu_slice = a_gpu[end:start:-step]
a_slice = a[end:start:-step]

assert la.norm(a_gpu_slice.get()-a_slice) == 0

@mark_cuda_test
def test_2d_slice_c(self):
from pycuda.curandom import rand as curand
Expand Down