A Simple program with Pyopencl
This post is talks about a simple map program written using Pyopencl. It compares the running time of a normal python map and opencl kernel. Let us start with first importing opencl libary and get all the platforms in the current system.
In [3]:
import pyopencl as cl
plt = cl.get_platforms()
plt
Out[3]:
In [4]:
devices = plt[0].get_devices()
devices
Out[4]:
In [14]:
from IPython.display import Image
Image(filename='e:\devicemanager.png')
Out[14]:
In [6]:
ctx = cl.Context([devices[0]])
ctx.get_info(cl.context_info.DEVICES)
Out[6]:
In [7]:
import numpy as np
in_vector = np.arange(100000).astype(np.float32)
out_vector = np.empty_like(in_vector)
In [8]:
mf = cl.mem_flags
in_buffer = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=in_vector)
out_buffer = cl.Buffer(ctx,mf.WRITE_ONLY,out_vector.nbytes)
In [9]:
prg_str = " \
__kernel void sq_input(__global const float *in_vector, \
__global float *out_vector) \
{ \
int gid = get_global_id(0); \
out_vector[gid] = in_vector[gid] * in_vector[gid]; \
} \
"
In [10]:
kernel_prg = cl.Program(ctx,prg_str).build()
In [11]:
queue = cl.CommandQueue(ctx)
kernel_prg.sq_input(queue, in_vector.shape, None, in_buffer, out_buffer)
cl.enqueue_copy(queue, out_vector, out_buffer)
Out[11]:
In [12]:
in_vector
Out[12]:
In [13]:
out_vector
Out[13]:
Timeit to check performance
In [16]:
%timeit kernel_prg.sq_input(queue, in_vector.shape, None, in_buffer, out_buffer)
In [17]:
%timeit map(lambda x: x*x,in_vector)
In [4]:
%matplotlib inline
import matplotlib.pyplot as plt
In [7]:
N = np.asarray([100, 1000, 10000, 100000, 1000000, 10000000])
cpu_time = np.asarray([35.4, 337, 3500, 37100, 386000, 4080000])
gpu_time = np.asarray([39.2, 44.7, 42, 41.1, 670, 7590])
In [23]:
plt.title("CPU vs GPU Time in log scale")
plt.xlabel("Log(N)")
plt.ylabel("Log(Execution Time n micro seconds)")
plt.plot(np.log(N),np.log(cpu_time),label='CPU Time')
plt.plot(np.log(N),np.log(gpu_time),label='GPU Time')
plt.legend(loc='best')
Out[23]: