Benchmarking a few sorting algorithms using ipython's timeit
function, with pandas
and ggplot
libraries for result presentation.
Tests are run on reverse-ordered integer lists of increasing size.
The following are tested:
Algorithms are coded as described in this awesome interactive python course.
import timeit
from ggplot import *
import pandas as pd
from math import log10
def bubble(alist):
for passnum in range(len(alist)-1,0,-1):
for i in range(passnum):
if alist[i] > alist[i+1]:
alist[i],alist[i+1] = alist[i+1],alist[i]
def insertion(alist):
for index in range(1,len(alist)):
currentvalue = alist[index]
position = index
while position>0 and alist[position-1]>currentvalue:
alist[position] = alist[position-1]
position -= 1
alist[position] = currentvalue
def selection(alist):
for fillslot in range(len(alist)-1,0,-1):
positionOfMax=0
for location in range(1,fillslot+1):
if alist[location]>alist[positionOfMax]:
positionOfMax = location
alist[fillslot],alist[positionOfMax] = alist[positionOfMax],alist[fillslot]
def shell(alist):
sublistcount = len(alist)//2
while sublistcount > 0:
for startposition in range(sublistcount):
gapInsertionSort(alist,startposition,sublistcount)
sublistcount = sublistcount // 2
def gapInsertionSort(alist, start, gap):
for i in range(start+gap, len(alist), gap):
currentvalue = alist[i]
position = i
while position >= gap and alist[position-gap] > currentvalue:
alist[position] = alist[position-gap]
position = position-gap
alist[position] = currentvalue
def merge(alist):
if len(alist)>1:
mid = len(alist)//2
lefthalf = alist[:mid]
righthalf = alist[mid:]
merge(lefthalf)
merge(righthalf)
i,j,k = 0,0,0
while i<len(lefthalf) and j<len(righthalf):
if lefthalf[i]<righthalf[j]:
alist[k]=lefthalf[i]
i=i+1
else:
alist[k]=righthalf[j]
j=j+1
k=k+1
while i<len(lefthalf):
alist[k]=lefthalf[i]
i=i+1
k=k+1
while j<len(righthalf):
alist[k]=righthalf[j]
j=j+1
k=k+1
def quick(alist):
quickSortHelper(alist,0,len(alist)-1)
def quickSortHelper(alist,first,last):
if first<last:
splitpoint = partition(alist,first,last)
quickSortHelper(alist,first,splitpoint-1)
quickSortHelper(alist,splitpoint+1,last)
def partition(alist,first,last):
pivotvalue = alist[first]
leftmark = first+1
rightmark = last
done = False
while not done:
while leftmark <= rightmark and alist[leftmark] <= pivotvalue:
leftmark = leftmark + 1
while alist[rightmark] >= pivotvalue and rightmark >= leftmark:
rightmark = rightmark -1
if rightmark < leftmark:
done = True
else:
alist[leftmark],alist[rightmark]=alist[rightmark],alist[leftmark]
alist[first],alist[rightmark]=alist[rightmark],alist[first]
return rightmark
algs = ['bubble', 'insertion', 'selection', 'shell','merge','quick']
times = {x:[] for x in algs}
for f in algs:
int_lists = [range(50*y,0,-1) for y in range(1,11)] #set up reverse-sorted lists of increasing size
int_lists_lengths = [len(x) for x in int_lists]
for li in int_lists:
t = timeit.Timer('%s(li)' %f, 'from __main__ import %s, li' %f).repeat(repeat=3, number=100)
t_max = max(t)
t_max_transform = log10(1000*t_max) #take log10 of the milliseconds to visually separate the plotted curves
times[f].append(t_max_transform)
times_frame = pd.DataFrame(times)
times_frame.insert(0,'list size',int_lists_lengths)
print times_frame
list size bubble insertion merge quick selection shell 0 50 1.007534 -0.117510 0.928302 1.078277 0.929619 0.686821 1 100 1.419541 0.240306 1.283621 1.611787 1.480986 1.135608 2 150 1.754876 0.484754 1.486118 1.944468 1.890109 1.257771 3 200 2.000573 0.656185 1.615793 2.180284 2.062390 1.384195 4 250 2.195174 0.806802 1.724580 2.378709 2.250047 1.487718 5 300 2.363883 0.931466 1.820003 2.537163 2.407041 1.608034 6 350 2.516701 1.047428 1.892017 2.655110 2.538953 1.682586 7 400 2.631277 1.139531 2.098973 2.771925 2.663629 1.736340 8 450 2.728985 1.224326 2.006796 2.869309 2.765640 1.788239 9 500 2.813148 1.302503 2.061524 2.973612 2.859182 1.896802
a = ggplot(times_frame, aes("list size","bubble"))+geom_point(color='red') + \
geom_point(aes("list size","insertion"), color = 'orange') + \
geom_point(aes("list size","selection"), color = 'yellow') + \
geom_point(aes("list size", "shell"), color = 'green') + \
geom_point(aes("list size", "merge"), color = 'blue') + \
geom_point(aes("list size", "quick"), color = 'violet') + \
ggtitle("sorting benchmarks") + xlab("list size") + ylab("time (log10(msec))") + \
scale_x_continuous(breaks=int_lists_lengths)
a
## plotly plot:
# import plotly.plotly as py
# import plotly.tools as tls
# from plotly.graph_objs import *
# py.sign_in("IPython.Demo", "1fw3zw2o13")
# fig = a.draw()
# py.iplot_mpl(fig)
<ggplot: (6623221)>
Insertion sort looks to be the fastest. (the ggplot python library does not yet properly support legends, as of 09/2014).