SkipList

An implementation and exploration of skiplists.

Code taken/refactored from John Shipman's excellent SkipList implementation: http://infohost.nmt.edu/tcc/help/lang/python/examples/pyskip/pyskip.pdf

In [36]:
import random
def random_level(max_level):
    num = random.randint(1, 2**max_level - 1)
    lognum = math.log(num, 2)
    level = int(floor(lognum))
    return max_level - level

print random_level(8)
print random_level(8)
print random_level(8)
print random_level(8)
print random_level(8)
print random_level(8)
3
1
1
4
3
3

In [37]:
class Node(object):
    def __init__(self, value, level=0):
        self.value = value
        self.next = [None] * level
    
    def __str__(self):
        return "Node(%s,%s)" % (self.value, len(self.next))
    __repr__ = __str__
    
class SkipList(object):
    def __init__(self, max_level=8):
        self.max_level = max_level
        n = Node(None, max_level)
        self.head = n
        self.verbose = False
    
    def update_list(self, value):
        update = [None] * (self.max_level)
        n = self.head
        
        self._n_traverse = 0
        
        level = self.max_level - 1
        while level >= 0:
            if self.verbose and \
                n.next[level] != None and n.next[level].value >= value:
                print 'DROP down from level', level + 1
            while n.next[level] != None and n.next[level].value < value:
                self._n_traverse += 1
                if self.verbose:
                    print 'AT level', level, 'value', n.next[level].value
                n = n.next[level]
            update[level] = n
            level -= 1

        return update
    
    def find(self, value, update=None):
        if update is None:
            update = self.update_list(value)
                        
        if len(update) > 0:
            candidate = update[0].next[0]
            if candidate != None and candidate.value == value:
                return candidate
        return None
    
    def insert_node(self, value, level=None):
        if level is None:
            level = random_level(self.max_level)
            
        node = Node(value, level)
        
        update = self.update_list(value)
        if self.find(value, update) == None:
            for i in range(level):
                node.next[i] = update[i].next[i]
                update[i].next[i] = node
    
def print_level(sl, level):
    print 'level %d:' % level,
    node = sl.head.next[level]
    while node:
        print node.value, '=>',
        node = node.next[level]
    print 'END'
        
In [46]:
# create and load a skiplist with max level of 4
x = SkipList(4)
for i in range(0, 20, 2):
    x.insert_node(i)
In [47]:
# print out the data structure
print_level(x, 0)
print_level(x, 1)
print_level(x, 2)
level 0: 0 => 2 => 4 => 6 => 8 => 10 => 12 => 14 => 16 => 18 => END
level 1: 2 => 6 => 10 => 12 => 18 => END
level 2: 10 => 12 => END

In [48]:
# verbalize the insertion process for '11'
x.verbose = True
print 'INSERT', 11
x.insert_node(11)
print 'DONE;', x._n_traverse, 'traversals'
INSERT 11
AT level 2 value 10
DROP down from level 2
DROP down from level 1
DONE; 1 traversals

In [49]:
# print out the updated data structure
print_level(x, 0)
print_level(x, 1)
print_level(x, 2)
level 0: 0 => 2 => 4 => 6 => 8 => 10 => 11 => 12 => 14 => 16 => 18 => END
level 1: 2 => 6 => 10 => 12 => 18 => END
level 2: 10 => 12 => END

In [53]:
# do a random simulation to evaluate how many traversals need to be done to reach
# the last element in the list
def skiplist_traverse_mc(max_level, max_count, n=100):
    z = []
    for _ in range(n):
        x = SkipList(max_level)
        for i in reversed(range(max_count)):
            x.insert_node(i)
        
        x.find(254)
        z.append(x._n_traverse)

    return z
In [51]:
avgs = []
for i in range(1, 10):
    z = skiplist_traverse_mc(i, 200)
    avgs.append((i, average(z)))

avgs = numpy.array(avgs)
In [52]:
# graph average to traverse to last of 200 elements
plot(avgs[:,0], avgs[:,1])
axis(ymin=0, xmin=0)
xlabel('skiplist max level')
ylabel('time to traverse to last of 200 elements')
Out[52]:
<matplotlib.text.Text at 0x10b832150>
In []: