sentence = "This cat jumped over this other cat!"

def stem(word):
    """ Stem word to primitive form 
    
    >>> stem("Hello!")
    'hello'
    """
    return word.lower().rstrip(",.!)-*_?:;$'-\"").lstrip("-*'\"(_$'")

def wordcount(string):
    words = string.split()
    
    stemmed_words = []
    for word in words:
        stemmed_words.append(stem(word))
    
    counts = dict()
    for word in stemmed_words:
        if word not in counts:
            counts[word] = 1
        else:
            counts[word] += 1
    
    return counts

wordcount(sentence)

from toolz import frequencies

frequencies(map(stem, sentence.split()))

from toolz import pipe

# Simple example
def double(x):
    return 2 * x

pipe(3, double, double, str)

from toolz.curried import map
    
pipe(sentence, str.split, map(stem), frequencies)

def wordcount(file):

    counts = dict()
    
    for line in file:
        words = line.split()
        
        stemmed_words = []
        for word in words:
            stemmed_words.append(stem(word))

        for word in stemmed_words:
            if word not in counts:
                counts[word] = 1
            else:
                counts[word] += 1
    
    return counts

with open('data/tale-of-two-cities.txt') as f:
    for i in range(112):  # Burn first 112 lines - they include the Gutenberg header
        next(f)
    result = wordcount(f)

result
    

from toolz import concat
from toolz.curried import drop

pipe('data/tale-of-two-cities.txt', open, drop(112), map(str.split), concat, map(stem), frequencies)

timeit pipe('data/tale-of-two-cities.txt', open, drop(112), map(str.split), concat, map(stem), frequencies)

timeit with open('data/tale-of-two-cities.txt') as f: wordcount(f)