%load_ext watermark
%watermark -d -m -v -p cython
[More information](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/ipython_magic/watermark.ipynb) about the `watermark` magic command extension.
First, we will write a simple implementation of the bubble sort algorithm in regular (C)Python
def python_bubblesort(a_list):
""" Bubblesort in Python for list objects. """
length = len(a_list)
swapped = 1
for i in range(0, length):
if swapped:
swapped = 0
for ele in range(0, length-i-1):
if a_list[ele] > a_list[ele + 1]:
temp = a_list[ele + 1]
a_list[ele + 1] = a_list[ele]
a_list[ele] = temp
swapped = 1
return a_list
python_bubblesort([6,3,1,5,6])
Next, we will speed things up a little bit via Cython's C-extensions for Python. Cython is basically a hybrid between C and Python and can be pictured as compiled Python code with type declarations.
Since we are working in an IPython notebook here, we can make use of the very convenient IPython magic: It will take care of the conversion to C code, the compilation, and eventually the loading of the function.
%load_ext cythonmagic
First, we will take the initial Python code as is and use Cython for the compilation. Cython is capable of auto-guessing types, however, we can make our code way more efficient by adding static types.
%%cython
def cython_bubblesort_untyped(a_list):
""" Bubblesort in Python for list objects. """
length = len(a_list)
swapped = 1
for i in range(0, length):
if swapped:
swapped = 0
for ele in range(0, length-i-1):
if a_list[ele] > a_list[ele + 1]:
temp = a_list[ele + 1]
a_list[ele + 1] = a_list[ele]
a_list[ele] = temp
swapped = 1
return a_list
%%cython
import numpy as np
cimport numpy as np
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef cython_bubblesort_typed(np.ndarray[long, ndim=1] inp_ary):
""" The Cython implementation of Bubblesort with NumPy memoryview."""
cdef unsigned long length, i, swapped, ele, temp
cdef long[:] np_ary = inp_ary
length = np_ary.shape[0]
swapped = 1
for i in xrange(0, length):
if swapped:
swapped = 0
for ele in xrange(0, length-i-1):
if np_ary[ele] > np_ary[ele + 1]:
temp = np_ary[ele + 1]
np_ary[ele + 1] = np_ary[ele]
np_ary[ele] = temp
swapped = 1
return inp_ary
Below, we will do a quick speed comparison of our 3 implementations of the bubble sort algorithm by sorting a list (or numpy array) of 1000 random digits. Here, we have to make copies of the lists/numpy arrays, since our bubble sort implementation is sorting in place.
import random
import numpy as np
import copy
list_a = [random.randint(0,1000) for num in range(1000)]
list_b = copy.deepcopy(list_a)
ary_a = np.asarray(list_a)
ary_b = copy.deepcopy(ary_a)
ary_c = copy.deepcopy(ary_a)
import timeit
times = []
times.append(min(timeit.Timer('python_bubblesort(list_a)',
'from __main__ import python_bubblesort, list_a').repeat(repeat=3, number=1000)))
times.append(min(timeit.Timer('python_bubblesort(ary_a)',
'from __main__ import python_bubblesort, ary_a').repeat(repeat=3, number=1000)))
times.append(min(timeit.Timer('cython_bubblesort_untyped(list_b)',
'from __main__ import cython_bubblesort_untyped, list_b').repeat(repeat=3, number=1000)))
times.append(min(timeit.Timer('cython_bubblesort_untyped(ary_b)',
'from __main__ import cython_bubblesort_untyped, ary_b').repeat(repeat=3, number=1000)))
times.append(min(timeit.Timer('cython_bubblesort_typed(ary_c)',
'from __main__ import cython_bubblesort_typed, ary_c').repeat(repeat=3, number=1000)))
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
bar_labels = ('(C)Python on list',
'(C)Python on numpy array',
'untyped Cython on list',
'untyped Cython on numpy array',
'typed Cython with memoryview on numpy array')
fig = plt.figure(figsize=(10,8))
# plot bars
y_pos = np.arange(len(times))
plt.yticks(y_pos, bar_labels, fontsize=14)
bars = plt.barh(y_pos, times,
align='center', alpha=0.4, color='g')
# annotation and labels
for b,d in zip(bars, times):
plt.text(max(times)+0.1, b.get_y() + b.get_height()/2.5,
'{:.2} ms'.format(d),
ha='center', va='bottom', fontsize=12)
t = plt.title('Bubblesort on 1000 random integers', fontsize=18)
plt.ylim([-1,len(times)+0.5])
plt.vlines(min(times), -1, len(times)+0.5, linestyles='dashed')
plt.grid()
plt.show()
As we can see from the results above, we are already able to make our Python code run almost as twice as fast if we compile it via Cython (Python on list: 332 µs, untyped Cython on list: 183 µs).
However, although it is more "work" to adjust the Python code, the "typed Cython with memoryview on numpy array" is significantly as expected.
IPython's notebook is really great for explanatory analysis and documentation, but what if we want to compile our Python code via Cython without letting IPython's magic doing all the work?
These are the steps you would need.
%%file cython_bubblesort_nomagic.pyx
cimport numpy as np
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef cython_bubblesort_nomagic(np.ndarray[long, ndim=1] inp_ary):
""" The Cython implementation of Bubblesort with NumPy memoryview."""
cdef unsigned long length, i, swapped, ele, temp
cdef long[:] np_ary = inp_ary
length = np_ary.shape[0]
swapped = 1
for i in xrange(0, length):
if swapped:
swapped = 0
for ele in xrange(0, length-i-1):
if np_ary[ele] > np_ary[ele + 1]:
temp = np_ary[ele + 1]
np_ary[ele + 1] = np_ary[ele]
np_ary[ele] = temp
swapped = 1
return inp_ary
%%file setup.py
import numpy as np
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
setup(
cmdclass = {'build_ext': build_ext},
ext_modules = [
Extension("cython_bubblesort_nomagic",
["cython_bubblesort_nomagic.pyx"],
include_dirs=[np.get_include()])
]
)
!python3 setup.py build_ext --inplace
import cython_bubblesort_nomagic
cython_bubblesort_nomagic.cython_bubblesort_nomagic(np.array([4,6,2,1,6]))
from cython_bubblesort_nomagic import cython_bubblesort_nomagic
list_a = [random.randint(0,100000) for num in range(100000)]
ary_a = np.asarray(list_a)
ary_b = np.asarray(list_a)
times = []
times.append(min(timeit.Timer('cython_bubblesort_typed(ary_a)',
'from __main__ import cython_bubblesort_typed, ary_a').repeat(repeat=3, number=1000)))
times.append(min(timeit.Timer('cython_bubblesort_nomagic(ary_b)',
'from __main__ import cython_bubblesort_nomagic, ary_b').repeat(repeat=3, number=1000)))
from matplotlib import pyplot as plt
import numpy as np
bar_labels = ('Cython with IPython magic',
'Cython without IPython magic')
fig = plt.figure(figsize=(10,8))
# plot bars
y_pos = np.arange(len(times))
plt.yticks(y_pos, bar_labels, fontsize=14)
bars = plt.barh(y_pos, times,
align='center', alpha=0.4, color='g')
# annotation and labels
for b,d in zip(bars, times):
plt.text(max(times)+0.02, b.get_y() + b.get_height()/2.5,
'{:.2} ms'.format(d),
ha='center', va='bottom', fontsize=12)
t = plt.title('Bubblesort on 100,000 random integers', fontsize=18)
plt.ylim([-1,len(times)+0.5])
plt.vlines(min(times), -1, len(times)+0.5, linestyles='dashed')
plt.grid()
plt.show()