import pycuda.driver as cuda import pycuda.autoinit from pycuda.compiler import SourceModule mod = SourceModule(""" __global__ void sin1d(float *x,float *y) { int idx = threadIdx.x + blockDim.x*blockIdx.x; y[idx] = sinf(powf(x[idx],2.0f)); } """) Nx = 128 x = np.linspace(-3,3,Nx).astype(np.float32) y = np.empty_like(x) func = mod.get_function("sin1d") func(cuda.In(x),cuda.Out(y),block=(Nx,1,1),grid=(1,1,1)) plot(x,y) import pycuda.driver as cuda import pycuda.autoinit from pycuda.compiler import SourceModule mod = SourceModule(""" __global__ void sin1da(float *y) { int idx = threadIdx.x + blockDim.x*blockIdx.x; float x = -3.0f+6.0f*float(idx)/blockDim.x; y[idx] = sinf(powf(x,2.0f)); } """) Nx = 128 x = np.linspace(-3,3,Nx).astype(np.float32) y = np.empty_like(x) func = mod.get_function("sin1da") func(cuda.Out(y),block=(Nx,1,1),grid=(1,1,1)) plot(x,y,'r') import pycuda.driver as cuda import pycuda.autoinit from pycuda.compiler import SourceModule mod = SourceModule(""" __global__ void sin2d(float *z) { int idx = threadIdx.x; int idy = blockIdx.x; int gid = idx + blockDim.x*idy; float x = -4.0f+6.0f*float(idx)/blockDim.x; float y = -3.0f+6.0f*float(idy)/gridDim.x; z[gid] = sinf(powf(x,2.0f)+powf(y,2.0f)); } """) Nx = 128 Ny = 64 x = np.linspace(-4,2,Nx).astype(np.float32) y = np.linspace(-3,3,Ny).astype(np.float32) XX,YY = np.meshgrid(x,y) z = np.zeros(Nx*Ny).astype(np.float32) func = mod.get_function("sin2d") func(cuda.Out(z),block=(Nx,1,1),grid=(Ny,1,1)) contourf(XX,YY,z.reshape(Ny,Nx) ) contourf(XX,YY,np.sin(XX**2+YY**2)) import pycuda.driver as cuda import pycuda.autoinit from pycuda.compiler import SourceModule mod = SourceModule(""" __global__ void sin2da(float *z) { int ix = threadIdx.x + blockIdx.x * blockDim.x; int iy = threadIdx.y + blockIdx.y * blockDim.y; int gid = ix + iy * blockDim.x * gridDim.x; float x = -4.0f+6.0f*float(ix)/(blockDim.x*gridDim.x); float y = -3.0f+6.0f*float(iy)/(blockDim.y*gridDim.y); z[gid] = sinf(powf(x,2.0f)+powf(y,2.0f)); } """) block_size = 4 Nx = 32*block_size Ny = 32*block_size x = np.linspace(-4,2,Nx).astype(np.float32) y = np.linspace(-3,3,Ny).astype(np.float32) XX,YY = np.meshgrid(x,y) z = np.zeros(Nx*Ny).astype(np.float32) func = mod.get_function("sin2da") func(cuda.Out(z),\ block=(block_size,block_size,1),\ grid=(Nx/block_size,Ny/block_size,1) ) contourf(XX,YY,z.reshape(Ny,Nx) )