%matplotlib inline # Wordcloud stuff comes from here: # https://github.com/amueller/word_cloud import nltk import matplotlib.pyplot as plt import wordcloud from nltk.corpus import stopwords english_stops = set(stopwords.words('english')) english_stops booklist = !ls data/books booklist def read_file(name, path): with file(path + name, 'r') as handle: return handle.read() raw = read_file(booklist[1], 'data/books/') # increasing the size of the figure drawn from pylab import rcParams rcParams['figure.figsize'] = (15,15) def make_word_cloud(text, stops=None): import wordcloud # you have to figure out a font path for your machine (sigh) myWordcloud = wordcloud.WordCloud(font_path="/Users/lynn/Library/Fonts/Lato-Medium.ttf", stopwords=stops).generate(text) plt.imshow(myWordcloud) plt.axis("off") plt.show() make_word_cloud(raw) from IPython.html.widgets import interact, interactive, fixed from IPython.html import widgets from IPython.display import clear_output, display, HTML def make_word_cloud_topics(files=None, path='data/books/'): """ This variant will use english stops plus any you add to it by hand.""" from nltk.corpus import stopwords english_stops = set(stopwords.words('english')) def make_pic(files, stops): stops = stops.split() # it's a string of words from your input, spaced newstops = english_stops newstops.update(stops) text = read_file(files, path) make_word_cloud(text, stops=newstops) interact(make_pic, files=files, stops='') # this lets you add stopwords separated by spaces, live... regular english stops are already filtered out. make_word_cloud_topics(files=booklist) stories = !ls data/stories/ stories make_word_cloud_topics(files=stories, path='data/stories/')