# This example is a quick and fun way to show applications of NB and how it works with NLTK. #!/usr/bin/env python # # pydata Tutorial March 18, 2013 # Thanks to StreamHacker a.k.a. Jacob Perkins # Thanks to Prof.Todd Ebert # import nltk.classify.util from nltk.classify import NaiveBayesClassifier from nltk import tokenize # # Data # label_1 = "Bieber" train_text_1 = "When I met you girl my heart went knock knock \ Now them butterflies in my stomach won't stop stop \ And even though it's a struggle love is all we got \ So we goin keep keep climbin' to the mountain top\ \ Your world is my world \ And my fight is your fight \ My breath is your breath \ When you're hurt, I'm not right" test_text_1 = "You look so deep\ You know that it humbles me\ You're by my side and troubles -- they don't trouble me\ Many have called but the chosen is you\ Whatever you want shawty I'll give it to you\ " label_2 = "Obama" # First try train_text_2 = "Hi, everybody. In my State of the Union Address,\ I talked about pizza. More specifically, \ I talked about a pizza chain in Minneapolis – Punch Pizza – whose owner, \ John Soranno, made the business decision to give his employees a raise to ten bucks an hour.\ \ And while not all of us always see eye to eye politically, one thing we overwhelmingly agree on is that nobody who works full-time should ever have to live in poverty. That’s why nearly three in four Americans support raising the minimum wage. The problem is, Republicans in Congress don’t support raising the minimum wage. Some even want to get rid of it entirely. In Oklahoma, for example, the Republican governor just signed a law prohibiting cities from establishing their own minimum wage. " test_text_2 = "Republicans have voted more than 50 times to undermine or repeal health care for millions of Americans. They should vote at least once to raise the minimum wage for millions of working families. " # # For testing classification # classify_bieber = "It's like an angel came by and took me to heaven, 'cause when i stare in your eyes it couldn't be better" classify_obama = "one thing we overwhelmingly agree on is that nobody who works full-time should ever have to live in poverty. ." classify_other = "What do you call a lazy baby kangaroo? A pouch potato." # # Take a list of words and turn it into a Bag Of Words # def bag_of_words(words): return dict([(word, True) for word in words]) # # Program Starts Here # # # Step 1 : Feature Extraction # train_words_1 = tokenize.word_tokenize(train_text_1) # Let's take a look at what tokenize does: train_words_1 train_words_2 = tokenize.word_tokenize(train_text_2) train_bieber = [(bag_of_words(train_words_1), label_1)] # Let's take a look at what bag_of_words does... train_bieber # Bag_of_words creates our Bernoulli word vector, and then we include our condition at the end. # We can thus use this for our probabilities train_obama = [(bag_of_words(train_words_2), label_2)] test_words_1 = tokenize.word_tokenize(test_text_1) test_words_2 = tokenize.word_tokenize(test_text_2) test_bieber = [(bag_of_words(test_words_1), label_1)] test_obama = [(bag_of_words(test_words_2), label_2)] # We give NLTK all of our training features. And all of our test features train_features = train_bieber + train_obama test_features = test_bieber + test_obama # # Step 2: Train the classifier # # # We use the test set to measure accuracy classifier = NaiveBayesClassifier.train(train_features) #print 'Accuracy : %d' % nltk.classify.util.accuracy(classifier, test_features) # # Step 3 :Test Classification # print classifier.classify(bag_of_words(tokenize.word_tokenize(classify_bieber))) print classifier.classify(bag_of_words(tokenize.word_tokenize(classify_obama))) print classifier.classify(bag_of_words(tokenize.word_tokenize(classify_other))) # # Testing with random words # def BiebOrBam(wordz): print classifier.classify(bag_of_words(tokenize.word_tokenize(wordz))) # # Testing it out # # BiebOrBam('Peace out, ladies') BiebOrBam('I love you') BiebOrBam('let\'s dance') BiebOrBam('Let\'s dance poverty away') BiebOrBam('Let\'s dance poverty away for good America') BiebOrBam('Let\'s dance poverty away for good America. I love you') # # The End #