import numpy as np
import pandas as pd
train=p.read_csv("kaggle-sentiment/train.tsv", delimiter='\t')
y=train['Sentiment'].values
X=train['Phrase'].values
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import BernoulliNB, MultinomialNB
def runPipeline(classifier, boolean, cv):
nb_clf_pipe = Pipeline([('vect', CountVectorizer(encoding='latin-1', binary=boolean)),('nb', classifier)])
scores = cross_val_score(nb_clf_pipe, X, y, cv=cv)
avg=sum(scores)/len(scores)
pretty_line = "{} | B? {} | CV: {} | Classifier: {}"
print(pretty_line.format(avg, str(boolean)[0], cv, str(classifier).split('(')[0]))
runPipeline(BernoulliNB(), False, 2)
runPipeline(BernoulliNB(), False, 3)
runPipeline(MultinomialNB(), False, 2)
runPipeline(MultinomialNB(), False, 3)
runPipeline(MultinomialNB(), True, 2)
runPipeline(MultinomialNB(), True, 3)
0.5529026288030471 | B? F | CV: 2 | Classifier: BernoulliNB 0.5531524365695574 | B? F | CV: 3 | Classifier: BernoulliNB 0.5592720169584305 | B? F | CV: 2 | Classifier: MultinomialNB 0.5595474569680894 | B? F | CV: 3 | Classifier: MultinomialNB 0.5596116298457374 | B? T | CV: 2 | Classifier: MultinomialNB 0.5601369637205256 | B? T | CV: 3 | Classifier: MultinomialNB
y
array([1, 2, 2, ..., 3, 2, 2])
df = pd.read_csv('hw6_data_sentiment.csv')
df
0 | PoN | tokens | num_tokens | sentences | num_sentences | no_sw | num_no_sw | topwords_unfil | topwords_fil | ... | v_pos_fd | bow | bow_nosw | diy_cleaner | pruned | nltk_negs | unigram_feats | bigram_feats | bigram_feats_neg | no_shared_words | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | I went to XYZ restaurant last week and I was v... | N | ['i', 'went', 'to', 'xyz', 'restaurant', 'last... | 50 | ['I went to XYZ restaurant last week and I was... | 3 | ['went', 'xyz', 'restaurant', 'last', 'week', ... | 25 | [('was', 4), ('to', 3), ('i', 2), ('and', 2), ... | [('went', 1), ('xyz', 1), ('restaurant', 1), (... | ... | 0.186 | Counter({'was': 4, 'to': 3, 'i': 2, 'and': 2, ... | Counter({'went': 1, 'xyz': 1, 'restaurant': 1,... | i went to xyz restaurant last week and i was v... | went restaurant last week very disappointed. f... | ['i', 'went', 'to', 'xyz', 'restaurant', 'last... | ['was', 'to', 'i', 'and', 'the', 'a_NEG', 'for... | ['i_went', 'went_to', 'to_xyz', 'xyz_restauran... | ['i_went', 'went_to', 'to_xyz', 'xyz_restauran... | ['i', 'to', 'xyz', 'week', 'and', 'i', 'was', ... |
1 | In each of the diner dish there are at least o... | N | ['in', 'each', 'of', 'the', 'diner', 'dish', '... | 78 | ['In each of the diner dish there are at least... | 4 | ['diner', 'dish', 'least', 'one', 'fly', 'wait... | 31 | [('the', 6), ('in', 4), ('to', 4), ('of', 3), ... | [('want', 3), ('dish', 2), ('diner', 1), ('lea... | ... | 0.042 | Counter({'the': 6, 'in': 4, 'to': 4, 'of': 3, ... | Counter({'want': 3, 'dish': 2, 'diner': 1, 'le... | in each of the diner dish there are at least o... | each diner dish there least waiting hour dish ... | ['in', 'each', 'of', 'the', 'diner', 'dish', '... | ['to_NEG', 'the', 'want_NEG', 'the_NEG', 'in',... | ['in_each', 'each_of', 'of_the', 'the_diner', ... | ['in_each', 'each_of', 'of_the', 'the_diner', ... | ['in', 'of', 'the', 'diner', 'are', 'at', 'lea... |
2 | This is the last place you would want to dine ... | N | ['this', 'is', 'the', 'last', 'place', 'you', ... | 151 | ['This is the last place you would want to din... | 7 | ['last', 'place', 'would', 'want', 'dine', 'pr... | 61 | [('to', 10), ('the', 9), ('and', 7), ('we', 5)... | [('minutes', 3), ('place', 2), ('price', 2), (... | ... | 0.171 | Counter({'to': 10, 'the': 9, 'and': 7, 'we': 5... | Counter({'minutes': 3, 'place': 2, 'price': 2,... | this is the last place you would want to dine ... | this last place would want dine price that exp... | ['this', 'is', 'the', 'last', 'place', 'you', ... | ['to_NEG', 'the_NEG', 'and_NEG', 'we_NEG', 'ha... | ['this_is', 'is_the', 'the_last', 'last_place'... | ['this_is', 'is_the', 'the_last', 'last_place'... | ['is', 'the', 'you', 'to', 'dine', 'at', 'the'... |
3 | I went to this restaurant where I had ordered ... | N | ['i', 'went', 'to', 'this', 'restaurant', 'whe... | 75 | ['I went to this restaurant where I had ordere... | 6 | ['went', 'restaurant', 'ordered', 'complimenta... | 33 | [('i', 6), ('the', 6), ('to', 3), ('for', 3), ... | [('salad', 3), ('restaurant', 2), ('waiter', 2... | ... | 0.162 | Counter({'i': 6, 'the': 6, 'to': 3, 'for': 3, ... | Counter({'salad': 3, 'restaurant': 2, 'waiter'... | i went to this restaurant where i had ordered ... | went this restaurant where ordered complimenta... | ['i', 'went', 'to', 'this', 'restaurant', 'whe... | ['the', 'i', 'salad', 'had', 'for', 'waiter', ... | ['i_went', 'went_to', 'to_this', 'this_restaur... | ['i_went', 'went_to', 'to_this', 'this_restaur... | ['i', 'to', 'i', 'had', 'for', 'the', 'complim... |
4 | I went there with two friends at 6pm. Long que... | N | ['i', 'went', 'there', 'with', 'two', 'friends... | 73 | ['I went there with two friends at 6pm.', 'Lon... | 10 | ['went', 'two', 'friends', 'long', 'queue', 'd... | 38 | [('there', 3), ('but', 3), ('it', 3), ('a', 3)... | [('two', 2), ('friends', 2), ('long', 2), ('di... | ... | 0.353 | Counter({'there': 3, 'but': 3, 'it': 3, 'a': 3... | Counter({'two': 2, 'friends': 2, 'long': 2, 'd... | i went there with two friends at 6pm. long que... | went there with friends 6pm. long queue there.... | ['i', 'went', 'there', 'with', 'two', 'friends... | ['a_NEG', 'there', 'us_NEG', 'but_NEG', 'and_N... | ['i_went', 'went_there', 'there_with', 'with_t... | ['i_went', 'went_there', 'there_with', 'with_t... | ['i', 'two', 'at', 'queue', 'was', 'but', 'it'... |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
83 | This place was one of the best restaurant I ha... | P | ['this', 'place', 'was', 'one', 'of', 'the', '... | 70 | ['This place was one of the best restaurant I ... | 6 | ['place', 'one', 'best', 'restaurant', 'price'... | 32 | [('the', 5), ('i', 3), ('and', 3), ('this', 2)... | [('best', 2), ('area', 2), ('place', 1), ('one... | ... | 0.300 | Counter({'the': 5, 'i': 3, 'and': 3, 'this': 2... | Counter({'best': 2, 'area': 2, 'place': 1, 'on... | this place was one of the best restaurant i ha... | this place best restaurant have been. price li... | ['this', 'place', 'was', 'one', 'of', 'the', '... | ['the', 'i', 'and', 'this', 'best', 'is', 'are... | ['this_place', 'place_was', 'was_one', 'one_of... | ['this_place', 'place_was', 'was_one', 'one_of... | ['was', 'one', 'of', 'the', 'i', 'the', 'is', ... |
84 | The best experience I ever had happened in Lon... | P | ['the', 'best', 'experience', 'i', 'ever', 'ha... | 42 | ['The best experience I ever had happened in L... | 3 | ['best', 'experience', 'ever', 'happened', 'lo... | 21 | [('the', 3), ('in', 3), ('food', 2), ('a', 2),... | [('food', 2), ('best', 1), ('experience', 1), ... | ... | 0.283 | Counter({'the': 3, 'in': 3, 'food': 2, 'a': 2,... | Counter({'food': 2, 'best': 1, 'experience': 1... | the best experience i ever had happened in lon... | best experience ever happened london britain. ... | ['the', 'best', 'experience', 'i', 'ever', 'ha... | ['in', 'the', 'best', 'experience', 'i', 'ever... | ['the_best', 'best_experience', 'experience_i'... | ['the_best', 'best_experience', 'experience_i'... | ['the', 'i', 'had', 'happened', 'in', 'london'... |
85 | This Japanese restaurant is so popular recentl... | P | ['this', 'japanese', 'restaurant', 'is', 'so',... | 88 | ['This Japanese restaurant is so popular recen... | 12 | ['japanese', 'restaurant', 'popular', 'recentl... | 49 | [('is', 4), ('the', 4), ('japanese', 2), ('a',... | [('japanese', 2), ('food', 2), ('right', 2), (... | ... | 0.462 | Counter({'is': 4, 'the': 4, 'japanese': 2, 'a'... | Counter({'japanese': 2, 'food': 2, 'right': 2,... | this japanese restaurant is so popular recentl... | this japanese restaurant popular recently that... | ['this', 'japanese', 'restaurant', 'is', 'so',... | ['the_NEG', 'is_NEG', 'japanese', 'a', 'and_NE... | ['this_japanese', 'japanese_restaurant', 'rest... | ['this_japanese', 'japanese_restaurant', 'rest... | ['is', 'so', 'popular', 'recently', 'as', 'a',... |
86 | Hibachi the grill is one of my favorite restau... | P | ['hibachi', 'the', 'grill', 'is', 'one', 'of',... | 65 | ['Hibachi the grill is one of my favorite rest... | 5 | ['hibachi', 'grill', 'one', 'favorite', 'resta... | 30 | [('the', 8), ('is', 6), ('it', 3), ('hibachi',... | [('hibachi', 2), ('grill', 2), ('restaurants',... | ... | 0.388 | Counter({'the': 8, 'is': 6, 'it': 3, 'hibachi'... | Counter({'hibachi': 2, 'grill': 2, 'restaurant... | hibachi the grill is one of my favorite restau... | hibachi grill favorite restaurants. like drama... | ['hibachi', 'the', 'grill', 'is', 'one', 'of',... | ['the', 'is', 'it', 'hibachi', 'grill', 'of', ... | ['hibachi_the', 'the_grill', 'grill_is', 'is_o... | ['hibachi_the', 'the_grill', 'grill_is', 'is_o... | ['hibachi', 'the', 'grill', 'is', 'one', 'of',... |
87 | I went to this ultra-luxurious restaurant in D... | P | ['i', 'went', 'to', 'this', 'restaurant', 'in'... | 63 | ['I went to this ultra-luxurious restaurant in... | 5 | ['went', 'restaurant', 'downtown', 'new', 'yor... | 35 | [('i', 4), ('this', 3), ('and', 3), ('restaura... | [('restaurant', 2), ('expensive', 2), ('went',... | ... | 0.223 | Counter({'i': 4, 'this': 3, 'and': 3, 'restaur... | Counter({'restaurant': 2, 'expensive': 2, 'wen... | i went to this ultra-luxurious restaurant in d... | went this ultra-luxurious restaurant downtown ... | ['i', 'went', 'to', 'this', 'restaurant', 'in'... | ['i', 'this', 'and', 'restaurant', 'in', 'expe... | ['i_went', 'went_to', 'to_this', 'this_restaur... | ['i_went', 'went_to', 'to_this', 'this_restaur... | ['i', 'to', 'in', 'downtown', 'new', 'york', '... |
88 rows × 40 columns