%load https://raw.github.com/lebedov/scikits.cuda/master/demos/fft_demo.py %%time """ Demonstrates how to use the PyCUDA interface to CUFFT to compute 1D FFTs. """ import pycuda.autoinit import pycuda.gpuarray as gpuarray import numpy as np import scikits.cuda.fft as cu_fft print 'Testing fft/ifft..' N = 4096*16000 x = np.asarray(np.random.rand(N), np.float32) xf = np.fft.fft(x) y = np.real(np.fft.ifft(xf)) x_gpu = gpuarray.to_gpu(x) xf_gpu = gpuarray.empty(N/2+1, np.complex64) plan_forward = cu_fft.Plan(x_gpu.shape, np.float32, np.complex64) cu_fft.fft(x_gpu, xf_gpu, plan_forward) y_gpu = gpuarray.empty_like(x_gpu) plan_inverse = cu_fft.Plan(x_gpu.shape, np.complex64, np.float32) cu_fft.ifft(xf_gpu, y_gpu, plan_inverse, True) print 'Success status: ', np.allclose(y, y_gpu.get(), atol=1e-6) print 'Testing in-place fft..' x = np.asarray(np.random.rand(N)+1j*np.random.rand(N), np.complex64) x_gpu = gpuarray.to_gpu(x) plan = cu_fft.Plan(x_gpu.shape, np.complex64, np.complex64) cu_fft.fft(x_gpu, x_gpu, plan) cu_fft.ifft(x_gpu, x_gpu, plan, True) print 'Success status: ', np.allclose(x, x_gpu.get(), atol=1e-6) %%time cu_fft.fft(x_gpu, x_gpu, plan) x_gpu.get()[1234] %%time xfft = np.fft.fft(x) x.shape[0]/1024**2 x_gpu.get()[1234] %load https://github.com/lebedov/scikits.cuda/raw/master/demos/dot_demo.py %%time """ Demonstrates multiplication of two matrices on the GPU. """ import pycuda.autoinit import pycuda.gpuarray as gpuarray import pycuda.driver as drv import numpy as np import scikits.cuda.linalg as culinalg import scikits.cuda.misc as cumisc culinalg.init() # Double precision is only supported by devices with compute # capability >= 1.3: import string demo_types = [np.float32, np.complex64] if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3: demo_types.extend([np.float64, np.complex128]) for t in demo_types: print 'Testing matrix multiplication for type ' + str(np.dtype(t)) if np.iscomplexobj(t()): a = np.asarray(np.random.rand(10, 5)+1j*np.random.rand(10, 5), t) b = np.asarray(np.random.rand(5, 5)+1j*np.random.rand(5, 5), t) c = np.asarray(np.random.rand(5, 5)+1j*np.random.rand(5, 5), t) else: a = np.asarray(np.random.rand(10, 5), t) b = np.asarray(np.random.rand(5, 5), t) c = np.asarray(np.random.rand(5, 5), t) a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) c_gpu = gpuarray.to_gpu(c) temp_gpu = culinalg.dot(a_gpu, b_gpu) d_gpu = culinalg.dot(temp_gpu, c_gpu) temp_gpu.gpudata.free() del(temp_gpu) print 'Success status: ', np.allclose(np.dot(np.dot(a, b), c) , d_gpu.get()) print 'Testing vector multiplication for type ' + str(np.dtype(t)) if np.iscomplexobj(t()): d = np.asarray(np.random.rand(5)+1j*np.random.rand(5), t) e = np.asarray(np.random.rand(5)+1j*np.random.rand(5), t) else: d = np.asarray(np.random.rand(5), t) e = np.asarray(np.random.rand(5), t) d_gpu = gpuarray.to_gpu(d) e_gpu = gpuarray.to_gpu(e) temp = culinalg.dot(d_gpu, e_gpu) print 'Success status: ', np.allclose(np.dot(d, e), temp)