Sentiment Analysis¶

TextBlob + Vader + NLTK + Naive Bayes¶

from textblob import TextBlob
from IPython.display import display, HTML
import os
import pandas as pd
import numpy as np
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

def get_data_from_files(path):
    directory = os.listdir(path)
    results = []
    for file in directory:
        f=open(path+file)
        results.append(f.read())
        f.close()
    return results

# HW 1
neg_k = get_data_from_files('AI_NEG/')
pos_k = get_data_from_files('AI_POS/')
neg_a = get_data_from_files('NEG/')
pos_a = get_data_from_files('POS/')

# HW2
neg_cornell = get_data_from_files('neg_cornell/')
pos_cornell = get_data_from_files('pos_cornell/')

# HW3
neg_dirty = get_data_from_files('NEG_dirty/')
pos_dirty = get_data_from_files('POS_dirty/')
neg_joker = get_data_from_files('NEG_JK/')
pos_joker = get_data_from_files('POS_JK/')

# HW4
neg_hw4 = get_data_from_files('neg_hw4/')
pos_hw4 = get_data_from_files('pos_hw4/')

# HW4
false_lie_hw4 = get_data_from_files('hw4_lie_false/')
true_lie_hw4 = get_data_from_files('hw4_lie_true/')

TEXT BLOB¶

def get_pn(num):
    return 'neg' if num < 0 else 'pos'

def get_sentiment(array, label):
    blobs = [[TextBlob(text), text] for text in array]
    return ([{'label': label,
              'prediction': get_pn(obj.sentiment.polarity),
              'sentiment': obj.sentiment.polarity,
              'length': len(text), 
              'excerpt': text[:50]} for obj,text in blobs])

CASE STUDY 1: Kendra's Data¶

df_n = pd.DataFrame(get_sentiment(neg_k, 'neg'))
df_p = pd.DataFrame(get_sentiment(pos_k, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n)
display(df_p)

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 5 out of 5 1.0
CORRECT PREDICT TRUE: 0 out of 5 0.0

CASE STUDY 2: Ami's Data¶

df_n = pd.DataFrame(get_sentiment(neg_a, 'neg'))
df_p = pd.DataFrame(get_sentiment(pos_a, 'pos'))

import numpy as np
df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n)
display(df_p)

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 1 out of 5 0.2
CORRECT PREDICT TRUE: 4 out of 5 0.8

CASE STUDY 3: Cornell Data¶

df_n = pd.DataFrame(get_sentiment(neg_cornell, 'neg'))
df_p = pd.DataFrame(get_sentiment(pos_cornell, 'pos'))

import numpy as np
df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 229 out of 1000 0.229
CORRECT PREDICT TRUE: 971 out of 1000 0.971

CASE STUDY 4: Dirty Data¶

df_n = pd.DataFrame(get_sentiment(neg_dirty, 'neg'))
df_p = pd.DataFrame(get_sentiment(pos_dirty, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 227 out of 1000 0.227
CORRECT PREDICT TRUE: 972 out of 1000 0.972

CASE STUDY 5: Joker Review Data¶

df_n = pd.DataFrame(get_sentiment(neg_joker, 'neg'))
df_p = pd.DataFrame(get_sentiment(pos_joker, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 64 out of 123 0.5203252032520326
CORRECT PREDICT TRUE: 114 out of 123 0.926829268292683

CASE STUDY 6: HW4 [Sentiment]¶

df_n = pd.DataFrame(get_sentiment(neg_hw4, 'neg'))
df_p = pd.DataFrame(get_sentiment(pos_hw4, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 26 out of 46 0.5652173913043478
CORRECT PREDICT TRUE: 46 out of 46 1.0

CASE STUDY 7: HW4 [Deception]¶

df_n = pd.DataFrame(get_sentiment(false_lie_hw4, 'neg'))
df_p = pd.DataFrame(get_sentiment(true_lie_hw4, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 14 out of 46 0.30434782608695654
CORRECT PREDICT TRUE: 34 out of 46 0.7391304347826086

VADER¶

def get_pn(num):
    return 'neg' if num < 0 else 'pos'

def get_vader_scores(array, label):
    vader_array = []
    for sentence in array:
        ss = sid.polarity_scores(sentence)
        vader_array.append({'label': label,
                            'prediction': get_pn(ss['compound']),
                            'compound': ss['compound'], 
                            'excerpt': sentence[:50]})
    return vader_array

neg_k

["WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICIAL INTELLIGENCE TOOK OUR JOBS.",
 "How can we trust Artificial Intelligence to drive our cars when they can't even hack a captcha?!",
 'I hate artificial intelligence!',
 'My dog is terrified by artificial intelligence!',
 'Artificial intelligence is going to melt the brains of our children!']

CASE STUDY 1: Kendra's Data¶

df_n = pd.DataFrame(get_vader_scores(neg_k, 'neg'))
df_p = pd.DataFrame(get_vader_scores(pos_k, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n)
display(df_p)

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 2 out of 5 0.4
CORRECT PREDICT TRUE: 5 out of 5 1.0

CASE STUDY 2: Ami's Data¶

df_n = pd.DataFrame(get_vader_scores(neg_a, 'neg'))
df_p = pd.DataFrame(get_vader_scores(pos_a, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n)
display(df_p)

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 3 out of 5 0.6
CORRECT PREDICT TRUE: 3 out of 5 0.6

CASE STUDY 3: Cornell Data¶

df_n = pd.DataFrame(get_vader_scores(neg_cornell, 'neg'))
df_p = pd.DataFrame(get_vader_scores(pos_cornell, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 445 out of 1000 0.445
CORRECT PREDICT TRUE: 828 out of 1000 0.828

CASE STUDY 4: Dirty Data¶

df_n = pd.DataFrame(get_vader_scores(neg_dirty, 'neg'))
df_p = pd.DataFrame(get_vader_scores(pos_dirty, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 454 out of 1000 0.454
CORRECT PREDICT TRUE: 824 out of 1000 0.824

CASE STUDY 5: Joker Review Data¶

df_n = pd.DataFrame(get_vader_scores(neg_joker, 'neg'))
df_p = pd.DataFrame(get_vader_scores(pos_joker, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 68 out of 123 0.5528455284552846
CORRECT PREDICT TRUE: 94 out of 123 0.7642276422764228

CASE STUDY 6: HW4 [Sentiment]¶

df_n = pd.DataFrame(get_vader_scores(neg_hw4, 'neg'))
df_p = pd.DataFrame(get_vader_scores(pos_hw4, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 26 out of 46 0.5652173913043478
CORRECT PREDICT TRUE: 45 out of 46 0.9782608695652174

CASE STUDY 7: HW4 [Deception]¶

df_n = pd.DataFrame(get_vader_scores(false_lie_hw4, 'neg'))
df_p = pd.DataFrame(get_vader_scores(true_lie_hw4, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 13 out of 46 0.2826086956521739
CORRECT PREDICT TRUE: 32 out of 46 0.6956521739130435

NLTK with NaiveBayes¶

from nltk.classify import NaiveBayesClassifier
from nltk.tokenize import word_tokenize
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *

def get_tokens(sentence):
    tokens = word_tokenize(sentence)
    clean_tokens = [word.lower() for word in tokens if word.isalpha()]
    return clean_tokens

def get_nltk_train_test(array, label, num_train):
    tokens = [get_tokens(sentence) for sentence in array]
    docs = [(sent, label) for sent in tokens]
    train_docs = docs[:num_train]
    test_docs = docs[num_train:len(array)]
    return [train_docs, test_docs]


def get_nltk_NB(NEG_DATA, POS_DATA, num_train):
    train_neg, test_neg = get_nltk_train_test(NEG_DATA, 'neg', num_train)
    train_pos, test_pos = get_nltk_train_test(POS_DATA, 'pos', num_train)

    training_docs = train_neg + train_pos
    testing_docs = test_neg + test_pos

    sentim_analyzer = SentimentAnalyzer()
    all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])
    unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg)
    sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
    training_set = sentim_analyzer.apply_features(training_docs)
    test_set = sentim_analyzer.apply_features(testing_docs)

    trainer = NaiveBayesClassifier.train
    classifier = sentim_analyzer.train(trainer, training_set)
    
    results = []
    for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
        print('{0}: {1}'.format(key,value))

CASE STUDY 1: Kendra's Data¶

get_nltk_NB(neg_k, pos_k, 4)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 1.0
F-measure [neg]: 1.0
F-measure [pos]: 1.0
Precision [neg]: 1.0
Precision [pos]: 1.0
Recall [neg]: 1.0
Recall [pos]: 1.0

CASE STUDY 2: Ami's Data¶

get_nltk_NB(neg_a, pos_a, 4)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.5
F-measure [neg]: 0.6666666666666666
F-measure [pos]: None
Precision [neg]: 0.5
Precision [pos]: None
Recall [neg]: 1.0
Recall [pos]: 0.0

CASE STUDY 3: Cornell's Data¶

get_nltk_NB(neg_cornell, pos_cornell, 800)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.8125
F-measure [neg]: 0.8259860788863109
F-measure [pos]: 0.7967479674796748
Precision [neg]: 0.7705627705627706
Precision [pos]: 0.8698224852071006
Recall [neg]: 0.89
Recall [pos]: 0.735

CASE STUDY 4: Dirty Data¶

get_nltk_NB(neg_dirty, pos_dirty, 800)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.7775
F-measure [neg]: 0.7944572748267898
F-measure [pos]: 0.757493188010899
Precision [neg]: 0.7381974248927039
Precision [pos]: 0.8323353293413174
Recall [neg]: 0.86
Recall [pos]: 0.695

CASE STUDY 5: Joker Review Data¶

get_nltk_NB(neg_joker, pos_joker, 86)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.581081081081081
F-measure [neg]: 0.6593406593406593
F-measure [pos]: 0.456140350877193
Precision [neg]: 0.5555555555555556
Precision [pos]: 0.65
Recall [neg]: 0.8108108108108109
Recall [pos]: 0.35135135135135137

CASE STUDY 6: HW4 [Sentiment]¶

get_nltk_NB(neg_hw4, pos_hw4, 32)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.75
F-measure [neg]: 0.6956521739130435
F-measure [pos]: 0.787878787878788
Precision [neg]: 0.8888888888888888
Precision [pos]: 0.6842105263157895
Recall [neg]: 0.5714285714285714
Recall [pos]: 0.9285714285714286

CASE STUDY 7: HW4 [Deception]¶

get_nltk_NB(false_lie_hw4, true_lie_hw4, 32)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.5714285714285714
F-measure [neg]: 0.5714285714285714
F-measure [pos]: 0.5714285714285714
Precision [neg]: 0.5714285714285714
Precision [pos]: 0.5714285714285714
Recall [neg]: 0.5714285714285714
Recall [pos]: 0.5714285714285714

type(false_lie_hw4)

list

false_lie_hw4[0]

'Gannon’s Isle Ice Cream served the best ice cream and you better believe it! The place is ideally situated and it is easy to get too. The ice cream is delicious the best I had. There were so many varieties that I had trouble choosing it. I had the chocolate and raspberry. A weird combination but the smooth sweet chocolate combined with the sharp taste of raspberry was devine! Try it!'

	label	prediction	sentiment	length	excerpt	accurate
0	neg	neg	-0.157143	76	WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICI...	yes
1	neg	neg	-0.750000	96	How can we trust Artificial Intelligence to dr...	yes
2	neg	neg	-0.775000	31	I hate artificial intelligence!	yes
3	neg	neg	-0.750000	47	My dog is terrified by artificial intelligence!	yes
4	neg	neg	-0.750000	68	Artificial intelligence is going to melt the b...	yes

	label	prediction	sentiment	length	excerpt	accurate
0	pos	neg	-0.112500	65	My dog is excited by the advancements in artif...	no
1	pos	neg	-0.075000	133	I'm excited for my child to grow up and have t...	no
2	pos	neg	-0.125000	31	I love artificial intelligence!	no
3	pos	neg	-0.300000	121	Order my groceries, pay my taxes, take my kids...	no
4	pos	neg	-0.133333	116	I'm grateful every day that my child will like...	no

	label	prediction	sentiment	length	excerpt	accurate
0	neg	neg	-0.054577	3554	that's exactly how long the movie felt to me ....	yes
1	neg	pos	0.025467	2929	" quest for camelot " is warner bros . ' firs...	no
2	neg	pos	0.003334	3365	so ask yourself what " 8mm " ( " eight millime...	no
3	neg	pos	0.022925	4418	synopsis : a mentally unstable man undergoing ...	no
4	neg	pos	0.043234	3911	capsule : in 2176 on the planet mars police ta...	no

	label	prediction	sentiment	length	excerpt	accurate
0	pos	pos	0.023663	4227	films adapted from comic books have had plenty...	yes
1	pos	pos	0.131092	2421	you've got mail works alot better than it dese...	yes
2	pos	pos	0.110626	6092	" jaws " is a rare film that grabs your atten...	yes
3	pos	pos	0.103847	4096	every now and then a movie comes along from a ...	yes
4	pos	neg	-0.070151	3898	moviemaking is a lot like being the general ma...	no

	label	prediction	sentiment	length	excerpt	accurate
0	neg	pos	0.026240	5953	bad . bad . \nbad . \nthat one word seems to p...	no
1	neg	pos	0.076040	3396	isn't it the ultimate sign of a movie's cinema...	no
2	neg	neg	-0.128733	2762	" gordy " is not a movie , it is a 90-minute-...	yes
3	neg	neg	-0.000485	3840	disconnect the phone line . \ndon't accept the...	yes
4	neg	pos	0.122770	2270	when robert forster found himself famous again...	no

	label	prediction	sentiment	length	excerpt	accurate
0	pos	pos	0.221173	4662	assume nothing . \nthe phrase is perhaps one o...	yes
1	pos	pos	0.089736	3839	plot : derek zoolander is a male model . \nhe ...	yes
2	pos	pos	0.206743	9380	i actually am a fan of the original 1961 or so...	yes
3	pos	pos	0.141905	2407	a movie that's been as highly built up as the ...	yes
4	pos	pos	0.176332	1840	" good will hunting " is two movies in one : ...	yes

	label	prediction	sentiment	length	excerpt	accurate
0	neg	neg	-0.004665	3777	by starring in amy heckerlings clueless two ...	yes
1	neg	pos	0.119184	3639	i have little against remakes and updates of o...	no
2	neg	pos	0.100886	4247	i cant recall a previous film experience where...	no
3	neg	pos	0.097526	4308	the tagline for this film is : some houses ar...	no
4	neg	pos	0.048745	5175	warner brothers ; rated pg-13 ( mild violence ...	no

	label	prediction	sentiment	length	excerpt	accurate
0	pos	pos	0.134641	4584	for the first reel of girls town , you just ca...	yes
1	pos	pos	0.137134	3102	field of dreams almost defies description . al...	yes
2	pos	pos	0.181355	3521	meet joe black is your classic boy-meets-girl ...	yes
3	pos	pos	0.104101	2192	an indian runner was more than a courier . he ...	yes
4	pos	pos	0.204967	4955	every once in a while , when an exceptional fa...	yes

	label	prediction	sentiment	length	excerpt	accurate
0	neg	pos	0.152083	1734	Missed Opportunity\nI had been very excited t...	no
1	neg	neg	-0.001852	328	5/5 for Phoenix's acting..\nI don't think the...	yes
2	neg	pos	0.200000	145	Everyone praised an overrated movie.\nOverrat...	no
3	neg	neg	-0.038095	350	What idiotic FIlm\nI can say that Phoenix is ...	yes
4	neg	pos	0.126398	711	Terrible\nThe only thing good about this movi...	no

	label	prediction	sentiment	length	excerpt	accurate
0	pos	pos	0.107162	5554	funny like a clown\nGreetings again from the ...	yes
1	pos	pos	0.014881	473	Only certain people can relate\nThis is a mov...	yes
2	pos	pos	0.008294	2509	"That's Life."\nIn an era of cinema so satura...	yes
3	pos	pos	0.036939	4022	Best DC movie since The Dark Knight Rises\nDC...	yes
4	pos	neg	-0.017162	1430	unbelievable, unrelatable, a bit boring to be...	no

	label	prediction	sentiment	length	excerpt	accurate
0	neg	neg	-0.273958	251	I went to XYZ restaurant last week and I was v...	yes
1	neg	pos	0.083333	359	In each of the diner dish there are at least o...	no
2	neg	neg	-0.134722	748	This is the last place you would want to dine ...	yes
3	neg	neg	-0.166667	378	I went to this restaurant where I had ordered ...	yes
4	neg	pos	0.152455	381	I went there with two friends at 6pm. Long que...	no

	label	prediction	sentiment	length	excerpt	accurate
0	pos	pos	0.626786	132	This restaurant ROCKS! I mean the food is grea...	yes
1	pos	pos	0.500000	441	Stronghearts cafe is the BEST! The owners have...	yes
2	pos	pos	0.480208	485	I went to cruise dinner in NYC with Spirit Cru...	yes
3	pos	pos	0.240278	404	Halos is home. I have been here numerous times...	yes
4	pos	pos	0.552083	324	The best restaurant I have ever been was a sma...	yes

	label	prediction	sentiment	length	excerpt	accurate
0	neg	pos	0.442752	386	Gannon’s Isle Ice Cream served the best ice cr...	no
1	neg	pos	0.197500	340	Hibachi the grill is one of my favorite restau...	no
2	neg	pos	0.353912	790	RIM KAAP One of the best Thai restaurants in t...	no
3	neg	pos	0.578788	391	It is a France restaurant which has Michelin t...	no
4	neg	pos	0.331373	710	Its hard to pick a favorite dining experience ...	no

	label	prediction	compound	excerpt	accurate
0	neg	neg	-0.6807	I went to XYZ restaurant last week and I was v...	yes
1	neg	neg	-0.6329	In each of the diner dish there are at least o...	yes
2	neg	pos	0.5161	This is the last place you would want to dine ...	no
3	neg	neg	-0.5423	I went to this restaurant where I had ordered ...	yes
4	neg	pos	0.8842	I went there with two friends at 6pm. Long que...	no

	label	prediction	sentiment	length	excerpt	accurate
0	pos	pos	0.000000	1	?	yes
1	pos	pos	0.236833	289	Twin Trees Cicero NY HUGE salad bar and high q...	yes
2	pos	neg	-0.249762	519	The worst restaurant that I have ever eaten in...	no
3	pos	pos	0.000000	1	?	yes
4	pos	pos	0.019481	234	I have been to a Asian restaurant in New York ...	yes

	label	prediction	compound	excerpt	accurate
0	neg	pos	0.5255	WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICI...	no
1	neg	pos	0.7712	How can we trust Artificial Intelligence to dr...	no
2	neg	neg	-0.2244	I hate artificial intelligence!	yes
3	neg	neg	-0.2942	My dog is terrified by artificial intelligence!	yes
4	neg	pos	0.5255	Artificial intelligence is going to melt the b...	no

	label	prediction	compound	excerpt	accurate
0	pos	pos	0.6705	My dog is excited by the advancements in artif...	yes
1	pos	pos	0.8271	I'm excited for my child to grow up and have t...	yes
2	pos	pos	0.8221	I love artificial intelligence!	yes
3	pos	pos	0.8213	Order my groceries, pay my taxes, take my kids...	yes
4	pos	pos	0.8402	I'm grateful every day that my child will like...	yes

	label	prediction	compound	excerpt	accurate
0	neg	pos	0.7836	that's exactly how long the movie felt to me ....	no
1	neg	neg	-0.8481	" quest for camelot " is warner bros . ' firs...	yes
2	neg	neg	-0.9753	so ask yourself what " 8mm " ( " eight millime...	yes
3	neg	pos	0.6824	synopsis : a mentally unstable man undergoing ...	no
4	neg	neg	-0.9879	capsule : in 2176 on the planet mars police ta...	yes

	label	prediction	compound	excerpt	accurate
0	pos	neg	-0.5887	films adapted from comic books have had plenty...	no
1	pos	pos	0.9964	you've got mail works alot better than it dese...	yes
2	pos	pos	0.9868	" jaws " is a rare film that grabs your atten...	yes
3	pos	pos	0.8825	every now and then a movie comes along from a ...	yes
4	pos	neg	-0.3525	moviemaking is a lot like being the general ma...	no

	label	prediction	compound	excerpt	accurate
0	neg	pos	0.9695	bad . bad . \nbad . \nthat one word seems to p...	no
1	neg	pos	0.1722	isn't it the ultimate sign of a movie's cinema...	no
2	neg	neg	-0.9970	" gordy " is not a movie , it is a 90-minute-...	yes
3	neg	pos	0.9861	disconnect the phone line . \ndon't accept the...	no
4	neg	pos	0.7445	when robert forster found himself famous again...	no

	label	prediction	compound	excerpt	accurate
0	pos	pos	0.9985	assume nothing . \nthe phrase is perhaps one o...	yes
1	pos	pos	0.9853	plot : derek zoolander is a male model . \nhe ...	yes
2	pos	pos	0.9998	i actually am a fan of the original 1961 or so...	yes
3	pos	pos	0.9671	a movie that's been as highly built up as the ...	yes
4	pos	pos	0.9300	" good will hunting " is two movies in one : ...	yes

	label	prediction	compound	excerpt	accurate
0	neg	neg	-0.9326	by starring in amy heckerlings clueless two ...	yes
1	neg	pos	0.8326	i have little against remakes and updates of o...	no
2	neg	pos	0.9491	i cant recall a previous film experience where...	no
3	neg	pos	0.9854	the tagline for this film is : some houses ar...	no
4	neg	neg	-0.8077	warner brothers ; rated pg-13 ( mild violence ...	yes

	label	prediction	compound	excerpt	accurate
0	pos	neg	-0.9888	for the first reel of girls town , you just ca...	no
1	pos	pos	0.9885	field of dreams almost defies description . al...	yes
2	pos	pos	0.9806	meet joe black is your classic boy-meets-girl ...	yes
3	pos	neg	-0.9614	an indian runner was more than a courier . he ...	no
4	pos	pos	0.9992	every once in a while , when an exceptional fa...	yes

	label	prediction	compound	excerpt	accurate
0	neg	pos	0.7501	Missed Opportunity\nI had been very excited t...	no
1	neg	pos	0.7184	5/5 for Phoenix's acting..\nI don't think the...	no
2	neg	pos	0.7269	Everyone praised an overrated movie.\nOverrat...	no
3	neg	neg	-0.6698	What idiotic FIlm\nI can say that Phoenix is ...	yes
4	neg	pos	0.7184	Terrible\nThe only thing good about this movi...	no

	label	prediction	compound	excerpt	accurate
0	pos	pos	0.9976	funny like a clown\nGreetings again from the ...	yes
1	pos	pos	0.9231	Only certain people can relate\nThis is a mov...	yes
2	pos	pos	0.9796	"That's Life."\nIn an era of cinema so satura...	yes
3	pos	neg	-0.9586	Best DC movie since The Dark Knight Rises\nDC...	no
4	pos	neg	-0.8813	unbelievable, unrelatable, a bit boring to be...	no

	label	prediction	compound	excerpt	accurate
0	pos	pos	0.9840	This restaurant ROCKS! I mean the food is grea...	yes
1	pos	pos	0.9702	Stronghearts cafe is the BEST! The owners have...	yes
2	pos	pos	0.9106	I went to cruise dinner in NYC with Spirit Cru...	yes
3	pos	pos	0.9349	Halos is home. I have been here numerous times...	yes
4	pos	pos	0.9686	The best restaurant I have ever been was a sma...	yes

	label	prediction	compound	excerpt	accurate
0	neg	pos	0.9328	Gannon’s Isle Ice Cream served the best ice cr...	no
1	neg	pos	0.8885	Hibachi the grill is one of my favorite restau...	no
2	neg	pos	0.9915	RIM KAAP One of the best Thai restaurants in t...	no
3	neg	pos	0.8625	It is a France restaurant which has Michelin t...	no
4	neg	pos	0.9360	Its hard to pick a favorite dining experience ...	no

	label	prediction	compound	excerpt	accurate
0	pos	pos	0.0000	?	yes
1	pos	pos	0.8321	Twin Trees Cicero NY HUGE salad bar and high q...	yes
2	pos	neg	-0.8641	The worst restaurant that I have ever eaten in...	no
3	pos	pos	0.0000	?	yes
4	pos	pos	0.5267	I have been to a Asian restaurant in New York ...	yes