from ipythonblocks import BlockGrid grid = BlockGrid(10,10, fill=(0,0,128)) x = grid.shape[0] y = grid.shape[1] for block in grid: r = block.row * 255 / float(x) g = block.col * 255 / float(y) block.red = r block.green = g grid import math class Hash(object): def __init__(self, size): self.size = size self.bits = [0]*size def add(self, num): num = num % self.size self.bits[num] = 1 def get(self, num): num = num % self.size return self.bits[num] class BloomFilter(object): def __init__(self, *sizes): self.hashes = [ Hash(size) for size in sizes ] def add(self, num): for h in self.hashes: h.add(num) def get(self, num): for h in self.hashes: if not h.get(num): return 0 return 1 def fp(self): total = 0. for h in self.hashes: occupancy = sum(h.bits) f = occupancy / float(h.size) total += math.log(f, 2) return 2**total def empirical_fp(self, actual, max): found_true = 0 found_false = 0 for i in range(max): if self.get(i): if i in actual: found_true += 1 else: found_false += 1 return found_false / float(max) def show(self): rows = len(self.hashes) cols = max([ h.size for h in self.hashes ]) grid = BlockGrid(cols, rows, fill=(0,0,0)) for i, h in enumerate(self.hashes): for pos in range(h.size, cols): grid[i, pos] = (255, 255, 255) for j, bit in enumerate(h.bits): if bit: grid[i, j] = (255, 0, 0) return grid.show() x = BloomFilter(5, 7, 11) x.show() x.add(253) x.show() print x.get(253) ### x.add(8132) x.show() print x.get(8132) import random, time x = BloomFilter(5, 7, 11) actual = set() for _ in range(10): num = random.randint(0, 255) actual.add(num) x.add(num) x.show() theory, empirical = x.fp(), x.empirical_fp(actual, 255) print 'inserting', num print 'predicted FP:', theory, 'diff from actual:', abs(theory-empirical) time.sleep(1) # here, we add '3' and test for '3 + 5*7*11', or 388. x = BloomFilter(5, 7, 11) x.add(3) x.show() print x.get(3), x.get(3 + (5*7*11)) print "oh noes! 388 is falsely said to be present in the data set!"