Lecturer: Álvaro Leitao2 - A.Leitao_at_cwi.nl
cd ./PyCUDA/
/home/jpsilva/Lisbon1214/notebooks/PyCUDA
import os
os.listdir('.')
['pycuda_montecarlo_pi_GPUArrays.py', 'pycuda_montecarlo_pi.py', 'pycuda_sumarrays.py']
%load pycuda_montecarlo_pi.py
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy
import time
C_cuda_code = SourceModule("""
__global__ void MC_Pi(float *U1, float *U2, float *counter)
{
const int i = blockIdx.x*blockDim.x + threadIdx.x;
counter[i] = 0;
if( (pow(U1[i],2) + pow(U2[i],2)) <= 1.0 )
counter[i] = 1;
}
""")
n = 16*1024*1204
U1 = numpy.random.rand(n).astype('f')
U2 = numpy.random.rand(n).astype('f')
counter = numpy.zeros(n).astype('f')
start_time = time.time()
func = C_cuda_code.get_function("MC_Pi")
size_block = 1024
size_grid = int((n-1)/size_block + 1)
func(cuda.In(U1), cuda.In(U2), cuda.Out(counter), block=(size_block,1,1), grid=(size_grid,1))
counter = numpy.sum(counter)
print "PI_gpu = ", 4.0*counter/n
print "Time elapsed GPU: ", time.time() - start_time, "s"
start_time = time.time()
counter_cpu = numpy.sum( (numpy.power(U1,2) + numpy.power(U2,2)) <= 1.0 )
print "PI_cpu = ", 4.0*counter_cpu/n
print "Time elapsed CPU: ", time.time() - start_time, "s"
%load pycuda_montecarlo_pi_GPUArrays.py
import pycuda.driver as cuda
import pycuda.autoinit
import numpy
import time
import pycuda.gpuarray as gpuarray
import pycuda.curandom as curandom
n = 16*1024*1204
U1 = curandom.rand(n)
U2 = curandom.rand(n)
counter = gpuarray.zeros(n, dtype='f')
start_time = time.time()
counter = gpuarray.sum( (U1*U1 + U2*U2) <= 1.0 )
print "PI_gpu = ", 4.0*counter/n
print "Time elapsed GPUArrays: ", time.time() - start_time, "s"
# Sequential part
U1 = numpy.random.rand(n).astype('f')
U2 = numpy.random.rand(n).astype('f')
start_time = time.time()
counter_cpu = numpy.sum( (numpy.power(U1,2) + numpy.power(U2,2)) <= 1.0 )
print "PI_cpu = ", 4.0*counter_cpu/n
print "Time elapsed CPU: ", time.time() - start_time, "s"
%load pycuda_sumarrays.py
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy
import time
C_cuda_code = SourceModule("""
__global__ void sum_arrays(float *a, float *b, float *c)
{
const int i = blockIdx.x*blockDim.x + threadIdx.x;
c[i] = a[i] + b[i];
}
""")
n = 16*1024*1024
a = numpy.random.randn(n).astype('f')
b = numpy.random.randn(n).astype('f')
c = numpy.zeros(n).astype('f')
a_gpu = cuda.mem_alloc(a.nbytes)
b_gpu = cuda.mem_alloc(b.nbytes)
c_gpu = cuda.mem_alloc(c.nbytes)
start_time = time.time()
cuda.memcpy_htod(a_gpu, a)
cuda.memcpy_htod(b_gpu, b)
func = C_cuda_code.get_function("sum_arrays")
size_block = 1024
size_grid = int((n-1)/size_block + 1)
func(a_gpu, b_gpu, c_gpu, block=(size_block,1,1), grid=(size_grid,1))
cuda.memcpy_dtoh(c, c_gpu)
print "Time elapsed GPU: ", time.time() - start_time, "s"
start_time = time.time()
c_cpu = a + b
print "Time elapsed CPU: ", time.time() - start_time, "s"
print numpy.alltrue(abs(c-c_cpu) < 1e-5)
%load_ext version_information
%version_information numpy, pycuda
Software | Version |
---|---|
Python | 2.7.9 64bit [GCC 4.4.7 20120313 (Red Hat 4.4.7-1)] |
IPython | 2.3.1 |
OS | Linux 3.16.0 28 generic x86_64 with debian jessie sid |
numpy | 1.9.1 |
pycuda | pycuda |
Wed Dec 17 13:18:01 2014 CET |
from IPython.core.display import HTML
def css_styling():
styles = open("./styles/custom.css", "r").read()
return HTML(styles)
css_styling()