In [7]:
from pybloom import BloomFilter
import os
import re
In [21]:
posts = {post_name: open("posts/" + post_name).read() for post_name in os.listdir("posts")}
split_posts = {name: set(re.split("\W+", contents.lower())) for name, contents in posts.items()}
In [23]:
filters = {}
for name, words in split_posts.items():
    filters[name] = BloomFilter(capacity=1000, error_rate=0.01)
    for word in words:
        filters[name].add(word)
In [28]:
def search(search_string):
    results = []
    search_terms = re.split("\W+", search_string)
    for name, filter in filters.items():
        if all(term in filter for term in search_terms):
            results.append(name)
    return results
In [29]:
search("python raspberry")
Out[29]:
['2013-06-19 - how-remote-control-rf-devices-raspberry-pi.md',
 '2013-06-09 - how-turn-your-raspberry-pi-infrared-remote-control.md',
 '2013-06-24 - writing-my-first-android-app-control-your-raspberr.md']