%matplotlib inline

# Wordcloud stuff comes from here:
# https://github.com/amueller/word_cloud

import nltk
import matplotlib.pyplot as plt
import wordcloud

from nltk.corpus import stopwords

english_stops = set(stopwords.words('english'))

english_stops

booklist = !ls data/books

booklist

def read_file(name, path):
    with file(path + name, 'r') as handle:
        return handle.read()

raw = read_file(booklist[1], 'data/books/')

# increasing the size of the figure drawn
from pylab import rcParams
rcParams['figure.figsize'] = (15,15)

def make_word_cloud(text, stops=None):
    import wordcloud
    # you have to figure out a font path for your machine (sigh)
    myWordcloud = wordcloud.WordCloud(font_path="/Users/lynn/Library/Fonts/Lato-Medium.ttf", 
                                      stopwords=stops).generate(text)
    plt.imshow(myWordcloud)
    plt.axis("off")
    plt.show()

make_word_cloud(raw)

from IPython.html.widgets import interact, interactive, fixed
from IPython.html import widgets
from IPython.display import clear_output, display, HTML


def make_word_cloud_topics(files=None, path='data/books/'):
    """ This variant will use english stops plus any you add to it by hand."""
    from nltk.corpus import stopwords
    
    english_stops = set(stopwords.words('english'))
    def make_pic(files, stops):
        stops = stops.split() # it's a string of words from your input, spaced
        newstops = english_stops
        newstops.update(stops)
        text = read_file(files, path)
        make_word_cloud(text, stops=newstops)

    interact(make_pic, files=files, stops='')

# this lets you add stopwords separated by spaces, live... regular english stops are already filtered out.
make_word_cloud_topics(files=booklist)

stories = !ls data/stories/
stories

make_word_cloud_topics(files=stories, path='data/stories/')