See http://ivory.idyll.org/blog/2013-pycon-awesome-big-data-algorithms-talk.html

# SkipList

An implementation and exploration of skiplists.

Code taken/refactored from John Shipman's excellent SkipList implementation: http://infohost.nmt.edu/tcc/help/lang/python/examples/pyskip/pyskip.pdf

In [36]:
import random
def random_level(max_level):
num = random.randint(1, 2**max_level - 1)
lognum = math.log(num, 2)
level = int(floor(lognum))
return max_level - level

print random_level(8)
print random_level(8)
print random_level(8)
print random_level(8)
print random_level(8)
print random_level(8)

3
1
1
4
3
3

In [37]:
class Node(object):
def __init__(self, value, level=0):
self.value = value
self.next = [None] * level

def __str__(self):
return "Node(%s,%s)" % (self.value, len(self.next))
__repr__ = __str__

class SkipList(object):
def __init__(self, max_level=8):
self.max_level = max_level
n = Node(None, max_level)
self.verbose = False

def update_list(self, value):
update = [None] * (self.max_level)

self._n_traverse = 0

level = self.max_level - 1
while level >= 0:
if self.verbose and \
n.next[level] != None and n.next[level].value >= value:
print 'DROP down from level', level + 1
while n.next[level] != None and n.next[level].value < value:
self._n_traverse += 1
if self.verbose:
print 'AT level', level, 'value', n.next[level].value
n = n.next[level]
update[level] = n
level -= 1

return update

def find(self, value, update=None):
if update is None:
update = self.update_list(value)

if len(update) > 0:
candidate = update[0].next[0]
if candidate != None and candidate.value == value:
return candidate
return None

def insert_node(self, value, level=None):
if level is None:
level = random_level(self.max_level)

node = Node(value, level)

update = self.update_list(value)
if self.find(value, update) == None:
for i in range(level):
node.next[i] = update[i].next[i]
update[i].next[i] = node

def print_level(sl, level):
print 'level %d:' % level,
while node:
print node.value, '=>',
node = node.next[level]
print 'END'


In [46]:
# create and load a skiplist with max level of 4
x = SkipList(4)
for i in range(0, 20, 2):
x.insert_node(i)

In [47]:
# print out the data structure
print_level(x, 0)
print_level(x, 1)
print_level(x, 2)

level 0: 0 => 2 => 4 => 6 => 8 => 10 => 12 => 14 => 16 => 18 => END
level 1: 2 => 6 => 10 => 12 => 18 => END
level 2: 10 => 12 => END

In [48]:
# verbalize the insertion process for '11'
x.verbose = True
print 'INSERT', 11
x.insert_node(11)
print 'DONE;', x._n_traverse, 'traversals'

INSERT 11
AT level 2 value 10
DROP down from level 2
DROP down from level 1
DONE; 1 traversals

In [49]:
# print out the updated data structure
print_level(x, 0)
print_level(x, 1)
print_level(x, 2)

level 0: 0 => 2 => 4 => 6 => 8 => 10 => 11 => 12 => 14 => 16 => 18 => END
level 1: 2 => 6 => 10 => 12 => 18 => END
level 2: 10 => 12 => END

In [53]:
# do a random simulation to evaluate how many traversals need to be done to reach
# the last element in the list
def skiplist_traverse_mc(max_level, max_count, n=100):
z = []
for _ in range(n):
x = SkipList(max_level)
for i in reversed(range(max_count)):
x.insert_node(i)

x.find(254)
z.append(x._n_traverse)

return z

In [51]:
avgs = []
for i in range(1, 10):
z = skiplist_traverse_mc(i, 200)
avgs.append((i, average(z)))

avgs = numpy.array(avgs)

In [52]:
# graph average to traverse to last of 200 elements
plot(avgs[:,0], avgs[:,1])
axis(ymin=0, xmin=0)
xlabel('skiplist max level')
ylabel('time to traverse to last of 200 elements')

Out[52]:
<matplotlib.text.Text at 0x10b832150>
In []: