In [ ]:
 
In [ ]:
 
In [19]:
import numpy as np
import pandas as pd
train=p.read_csv("kaggle-sentiment/train.tsv", delimiter='\t')
y=train['Sentiment'].values
X=train['Phrase'].values
In [20]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import BernoulliNB, MultinomialNB

def runPipeline(classifier, boolean, cv):
    nb_clf_pipe = Pipeline([('vect', CountVectorizer(encoding='latin-1', binary=boolean)),('nb', classifier)])
    scores = cross_val_score(nb_clf_pipe, X, y, cv=cv)
    avg=sum(scores)/len(scores)
    pretty_line = "{} | B? {} | CV: {} | Classifier: {}"
    print(pretty_line.format(avg, str(boolean)[0], cv, str(classifier).split('(')[0]))
    
runPipeline(BernoulliNB(), False, 2)
runPipeline(BernoulliNB(), False, 3)
runPipeline(MultinomialNB(), False, 2)
runPipeline(MultinomialNB(), False, 3)
runPipeline(MultinomialNB(), True, 2)
runPipeline(MultinomialNB(), True, 3)
0.5529026288030471 | B? F | CV: 2 | Classifier: BernoulliNB
0.5531524365695574 | B? F | CV: 3 | Classifier: BernoulliNB
0.5592720169584305 | B? F | CV: 2 | Classifier: MultinomialNB
0.5595474569680894 | B? F | CV: 3 | Classifier: MultinomialNB
0.5596116298457374 | B? T | CV: 2 | Classifier: MultinomialNB
0.5601369637205256 | B? T | CV: 3 | Classifier: MultinomialNB
In [ ]:
 
In [21]:
y
Out[21]:
array([1, 2, 2, ..., 3, 2, 2])
In [22]:
df = pd.read_csv('hw6_data_sentiment.csv')
In [23]:
df
Out[23]:
0 PoN tokens num_tokens sentences num_sentences no_sw num_no_sw topwords_unfil topwords_fil ... v_pos_fd bow bow_nosw diy_cleaner pruned nltk_negs unigram_feats bigram_feats bigram_feats_neg no_shared_words
0 I went to XYZ restaurant last week and I was v... N ['i', 'went', 'to', 'xyz', 'restaurant', 'last... 50 ['I went to XYZ restaurant last week and I was... 3 ['went', 'xyz', 'restaurant', 'last', 'week', ... 25 [('was', 4), ('to', 3), ('i', 2), ('and', 2), ... [('went', 1), ('xyz', 1), ('restaurant', 1), (... ... 0.186 Counter({'was': 4, 'to': 3, 'i': 2, 'and': 2, ... Counter({'went': 1, 'xyz': 1, 'restaurant': 1,... i went to xyz restaurant last week and i was v... went restaurant last week very disappointed. f... ['i', 'went', 'to', 'xyz', 'restaurant', 'last... ['was', 'to', 'i', 'and', 'the', 'a_NEG', 'for... ['i_went', 'went_to', 'to_xyz', 'xyz_restauran... ['i_went', 'went_to', 'to_xyz', 'xyz_restauran... ['i', 'to', 'xyz', 'week', 'and', 'i', 'was', ...
1 In each of the diner dish there are at least o... N ['in', 'each', 'of', 'the', 'diner', 'dish', '... 78 ['In each of the diner dish there are at least... 4 ['diner', 'dish', 'least', 'one', 'fly', 'wait... 31 [('the', 6), ('in', 4), ('to', 4), ('of', 3), ... [('want', 3), ('dish', 2), ('diner', 1), ('lea... ... 0.042 Counter({'the': 6, 'in': 4, 'to': 4, 'of': 3, ... Counter({'want': 3, 'dish': 2, 'diner': 1, 'le... in each of the diner dish there are at least o... each diner dish there least waiting hour dish ... ['in', 'each', 'of', 'the', 'diner', 'dish', '... ['to_NEG', 'the', 'want_NEG', 'the_NEG', 'in',... ['in_each', 'each_of', 'of_the', 'the_diner', ... ['in_each', 'each_of', 'of_the', 'the_diner', ... ['in', 'of', 'the', 'diner', 'are', 'at', 'lea...
2 This is the last place you would want to dine ... N ['this', 'is', 'the', 'last', 'place', 'you', ... 151 ['This is the last place you would want to din... 7 ['last', 'place', 'would', 'want', 'dine', 'pr... 61 [('to', 10), ('the', 9), ('and', 7), ('we', 5)... [('minutes', 3), ('place', 2), ('price', 2), (... ... 0.171 Counter({'to': 10, 'the': 9, 'and': 7, 'we': 5... Counter({'minutes': 3, 'place': 2, 'price': 2,... this is the last place you would want to dine ... this last place would want dine price that exp... ['this', 'is', 'the', 'last', 'place', 'you', ... ['to_NEG', 'the_NEG', 'and_NEG', 'we_NEG', 'ha... ['this_is', 'is_the', 'the_last', 'last_place'... ['this_is', 'is_the', 'the_last', 'last_place'... ['is', 'the', 'you', 'to', 'dine', 'at', 'the'...
3 I went to this restaurant where I had ordered ... N ['i', 'went', 'to', 'this', 'restaurant', 'whe... 75 ['I went to this restaurant where I had ordere... 6 ['went', 'restaurant', 'ordered', 'complimenta... 33 [('i', 6), ('the', 6), ('to', 3), ('for', 3), ... [('salad', 3), ('restaurant', 2), ('waiter', 2... ... 0.162 Counter({'i': 6, 'the': 6, 'to': 3, 'for': 3, ... Counter({'salad': 3, 'restaurant': 2, 'waiter'... i went to this restaurant where i had ordered ... went this restaurant where ordered complimenta... ['i', 'went', 'to', 'this', 'restaurant', 'whe... ['the', 'i', 'salad', 'had', 'for', 'waiter', ... ['i_went', 'went_to', 'to_this', 'this_restaur... ['i_went', 'went_to', 'to_this', 'this_restaur... ['i', 'to', 'i', 'had', 'for', 'the', 'complim...
4 I went there with two friends at 6pm. Long que... N ['i', 'went', 'there', 'with', 'two', 'friends... 73 ['I went there with two friends at 6pm.', 'Lon... 10 ['went', 'two', 'friends', 'long', 'queue', 'd... 38 [('there', 3), ('but', 3), ('it', 3), ('a', 3)... [('two', 2), ('friends', 2), ('long', 2), ('di... ... 0.353 Counter({'there': 3, 'but': 3, 'it': 3, 'a': 3... Counter({'two': 2, 'friends': 2, 'long': 2, 'd... i went there with two friends at 6pm. long que... went there with friends 6pm. long queue there.... ['i', 'went', 'there', 'with', 'two', 'friends... ['a_NEG', 'there', 'us_NEG', 'but_NEG', 'and_N... ['i_went', 'went_there', 'there_with', 'with_t... ['i_went', 'went_there', 'there_with', 'with_t... ['i', 'two', 'at', 'queue', 'was', 'but', 'it'...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
83 This place was one of the best restaurant I ha... P ['this', 'place', 'was', 'one', 'of', 'the', '... 70 ['This place was one of the best restaurant I ... 6 ['place', 'one', 'best', 'restaurant', 'price'... 32 [('the', 5), ('i', 3), ('and', 3), ('this', 2)... [('best', 2), ('area', 2), ('place', 1), ('one... ... 0.300 Counter({'the': 5, 'i': 3, 'and': 3, 'this': 2... Counter({'best': 2, 'area': 2, 'place': 1, 'on... this place was one of the best restaurant i ha... this place best restaurant have been. price li... ['this', 'place', 'was', 'one', 'of', 'the', '... ['the', 'i', 'and', 'this', 'best', 'is', 'are... ['this_place', 'place_was', 'was_one', 'one_of... ['this_place', 'place_was', 'was_one', 'one_of... ['was', 'one', 'of', 'the', 'i', 'the', 'is', ...
84 The best experience I ever had happened in Lon... P ['the', 'best', 'experience', 'i', 'ever', 'ha... 42 ['The best experience I ever had happened in L... 3 ['best', 'experience', 'ever', 'happened', 'lo... 21 [('the', 3), ('in', 3), ('food', 2), ('a', 2),... [('food', 2), ('best', 1), ('experience', 1), ... ... 0.283 Counter({'the': 3, 'in': 3, 'food': 2, 'a': 2,... Counter({'food': 2, 'best': 1, 'experience': 1... the best experience i ever had happened in lon... best experience ever happened london britain. ... ['the', 'best', 'experience', 'i', 'ever', 'ha... ['in', 'the', 'best', 'experience', 'i', 'ever... ['the_best', 'best_experience', 'experience_i'... ['the_best', 'best_experience', 'experience_i'... ['the', 'i', 'had', 'happened', 'in', 'london'...
85 This Japanese restaurant is so popular recentl... P ['this', 'japanese', 'restaurant', 'is', 'so',... 88 ['This Japanese restaurant is so popular recen... 12 ['japanese', 'restaurant', 'popular', 'recentl... 49 [('is', 4), ('the', 4), ('japanese', 2), ('a',... [('japanese', 2), ('food', 2), ('right', 2), (... ... 0.462 Counter({'is': 4, 'the': 4, 'japanese': 2, 'a'... Counter({'japanese': 2, 'food': 2, 'right': 2,... this japanese restaurant is so popular recentl... this japanese restaurant popular recently that... ['this', 'japanese', 'restaurant', 'is', 'so',... ['the_NEG', 'is_NEG', 'japanese', 'a', 'and_NE... ['this_japanese', 'japanese_restaurant', 'rest... ['this_japanese', 'japanese_restaurant', 'rest... ['is', 'so', 'popular', 'recently', 'as', 'a',...
86 Hibachi the grill is one of my favorite restau... P ['hibachi', 'the', 'grill', 'is', 'one', 'of',... 65 ['Hibachi the grill is one of my favorite rest... 5 ['hibachi', 'grill', 'one', 'favorite', 'resta... 30 [('the', 8), ('is', 6), ('it', 3), ('hibachi',... [('hibachi', 2), ('grill', 2), ('restaurants',... ... 0.388 Counter({'the': 8, 'is': 6, 'it': 3, 'hibachi'... Counter({'hibachi': 2, 'grill': 2, 'restaurant... hibachi the grill is one of my favorite restau... hibachi grill favorite restaurants. like drama... ['hibachi', 'the', 'grill', 'is', 'one', 'of',... ['the', 'is', 'it', 'hibachi', 'grill', 'of', ... ['hibachi_the', 'the_grill', 'grill_is', 'is_o... ['hibachi_the', 'the_grill', 'grill_is', 'is_o... ['hibachi', 'the', 'grill', 'is', 'one', 'of',...
87 I went to this ultra-luxurious restaurant in D... P ['i', 'went', 'to', 'this', 'restaurant', 'in'... 63 ['I went to this ultra-luxurious restaurant in... 5 ['went', 'restaurant', 'downtown', 'new', 'yor... 35 [('i', 4), ('this', 3), ('and', 3), ('restaura... [('restaurant', 2), ('expensive', 2), ('went',... ... 0.223 Counter({'i': 4, 'this': 3, 'and': 3, 'restaur... Counter({'restaurant': 2, 'expensive': 2, 'wen... i went to this ultra-luxurious restaurant in d... went this ultra-luxurious restaurant downtown ... ['i', 'went', 'to', 'this', 'restaurant', 'in'... ['i', 'this', 'and', 'restaurant', 'in', 'expe... ['i_went', 'went_to', 'to_this', 'this_restaur... ['i_went', 'went_to', 'to_this', 'this_restaur... ['i', 'to', 'in', 'downtown', 'new', 'york', '...

88 rows × 40 columns

In [ ]: