gpuArrayExample.py

import pycuda.autoinit
import pycuda.driver as drv
import pycuda.gpuarray as gpuarray
import pycuda.cumath as cumath
import numpy as np

n = np.int32(134217728)

start = drv.Event()
end = drv.Event()

x = np.random.normal(size = n)
x_short = np.random.normal(size = 8)

start.record()
dev_x = gpuarray.to_gpu(x)
dev_x_short = gpuarray.to_gpu(x_short)
end.record() 
end.synchronize()
print "Transfer to GPU time: %fs" %(start.time_till(end)*1e-3)


print "Timing vectorized exponentiation:"

start.record()
dev_expx_short = cumath.exp(dev_x_short)
end.record() 
end.synchronize()
print "GPU array calc time (initial): %fs" %(start.time_till(end)*1e-3)

start.record()
dev_expx = cumath.exp(dev_x)
end.record() 
end.synchronize()
print "GPU array calc time: %fs" %(start.time_till(end)*1e-3)

start.record()
exp_x = np.exp(x)
end.record() 
end.synchronize()
print "CPU calc time: %fs" %(start.time_till(end)*1e-3)

print "Timing vectorized dot product/sum of squares:"

start.record()
gpuarray.dot(dev_x_short,dev_x_short)
end.record() 
end.synchronize()
print "GPU array calc time (initial): %fs" %(start.time_till(end)*1e-3)

start.record()
gpuarray.dot(dev_x,dev_x)
end.record() 
end.synchronize()
print "GPU array calc time: %fs" %(start.time_till(end)*1e-3)

start.record()
np.dot(x, x)
end.record() 
end.synchronize()
print "CPU calc time: %fs" %(start.time_till(end)*1e-3)