Sentiment Analysis¶

TextBlob + Vader + NLTK + Naive Bayes¶

from textblob import TextBlob
from IPython.display import display, HTML
import os
import pandas as pd
import numpy as np
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

def get_data_from_files(path):
    directory = os.listdir(path)
    results = []
    for file in directory:
        f=open(path+file)
        results.append(f.read())
        f.close()
    return results

# HW 1
neg_k = get_data_from_files('AI_NEG/')
pos_k = get_data_from_files('AI_POS/')
neg_a = get_data_from_files('NEG/')
pos_a = get_data_from_files('POS/')

# HW2
neg_cornell = get_data_from_files('neg_cornell/')
pos_cornell = get_data_from_files('pos_cornell/')

# HW3
neg_dirty = get_data_from_files('NEG_dirty/')
pos_dirty = get_data_from_files('POS_dirty/')
neg_joker = get_data_from_files('NEG_JK/')
pos_joker = get_data_from_files('POS_JK/')

# HW4
neg_hw4 = get_data_from_files('neg_hw4/')
pos_hw4 = get_data_from_files('pos_hw4/')

# HW4
false_lie_hw4 = get_data_from_files('hw4_lie_false/')
true_lie_hw4 = get_data_from_files('hw4_lie_true/')

TEXT BLOB¶

def get_pn(num):
    return 'neg' if num < 0 else 'pos'

def get_sentiment(array, label):
    blobs = [[TextBlob(text), text] for text in array]
    return ([{'label': label,
              'prediction': get_pn(obj.sentiment.polarity),
              'sentiment': obj.sentiment.polarity,
              'length': len(text), 
              'excerpt': text[:50]} for obj,text in blobs])

CASE STUDY 1: Kendra's Data¶

df_n = pd.DataFrame(get_sentiment(neg_k, 'neg'))
df_p = pd.DataFrame(get_sentiment(pos_k, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n)
display(df_p)

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 5 out of 5 1.0
CORRECT PREDICT TRUE: 0 out of 5 0.0

CASE STUDY 2: Ami's Data¶

df_n = pd.DataFrame(get_sentiment(neg_a, 'neg'))
df_p = pd.DataFrame(get_sentiment(pos_a, 'pos'))

import numpy as np
df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n)
display(df_p)

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 1 out of 5 0.2
CORRECT PREDICT TRUE: 4 out of 5 0.8

CASE STUDY 3: Cornell Data¶

df_n = pd.DataFrame(get_sentiment(neg_cornell, 'neg'))
df_p = pd.DataFrame(get_sentiment(pos_cornell, 'pos'))

import numpy as np
df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 229 out of 1000 0.229
CORRECT PREDICT TRUE: 971 out of 1000 0.971

CASE STUDY 4: Dirty Data¶

df_n = pd.DataFrame(get_sentiment(neg_dirty, 'neg'))
df_p = pd.DataFrame(get_sentiment(pos_dirty, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 227 out of 1000 0.227
CORRECT PREDICT TRUE: 972 out of 1000 0.972

CASE STUDY 5: Joker Review Data¶

df_n = pd.DataFrame(get_sentiment(neg_joker, 'neg'))
df_p = pd.DataFrame(get_sentiment(pos_joker, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 64 out of 123 0.5203252032520326
CORRECT PREDICT TRUE: 114 out of 123 0.926829268292683

CASE STUDY 6: HW4 [Sentiment]¶

df_n = pd.DataFrame(get_sentiment(neg_hw4, 'neg'))
df_p = pd.DataFrame(get_sentiment(pos_hw4, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 26 out of 46 0.5652173913043478
CORRECT PREDICT TRUE: 46 out of 46 1.0

CASE STUDY 7: HW4 [Deception]¶

df_n = pd.DataFrame(get_sentiment(false_lie_hw4, 'neg'))
df_p = pd.DataFrame(get_sentiment(true_lie_hw4, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 14 out of 46 0.30434782608695654
CORRECT PREDICT TRUE: 34 out of 46 0.7391304347826086

VADER¶

def get_pn(num):
    return 'neg' if num < 0 else 'pos'

def get_vader_scores(array, label):
    vader_array = []
    for sentence in array:
        ss = sid.polarity_scores(sentence)
        vader_array.append({'label': label,
                            'prediction': get_pn(ss['compound']),
                            'compound': ss['compound'], 
                            'excerpt': sentence[:50]})
    return vader_array

neg_k

["WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICIAL INTELLIGENCE TOOK OUR JOBS.",
 "How can we trust Artificial Intelligence to drive our cars when they can't even hack a captcha?!",
 'I hate artificial intelligence!',
 'My dog is terrified by artificial intelligence!',
 'Artificial intelligence is going to melt the brains of our children!']

CASE STUDY 1: Kendra's Data¶

df_n = pd.DataFrame(get_vader_scores(neg_k, 'neg'))
df_p = pd.DataFrame(get_vader_scores(pos_k, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n)
display(df_p)

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 2 out of 5 0.4
CORRECT PREDICT TRUE: 5 out of 5 1.0

CASE STUDY 2: Ami's Data¶

df_n = pd.DataFrame(get_vader_scores(neg_a, 'neg'))
df_p = pd.DataFrame(get_vader_scores(pos_a, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n)
display(df_p)

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 3 out of 5 0.6
CORRECT PREDICT TRUE: 3 out of 5 0.6

CASE STUDY 3: Cornell Data¶

df_n = pd.DataFrame(get_vader_scores(neg_cornell, 'neg'))
df_p = pd.DataFrame(get_vader_scores(pos_cornell, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 445 out of 1000 0.445
CORRECT PREDICT TRUE: 828 out of 1000 0.828

CASE STUDY 4: Dirty Data¶

df_n = pd.DataFrame(get_vader_scores(neg_dirty, 'neg'))
df_p = pd.DataFrame(get_vader_scores(pos_dirty, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 454 out of 1000 0.454
CORRECT PREDICT TRUE: 824 out of 1000 0.824

CASE STUDY 5: Joker Review Data¶

df_n = pd.DataFrame(get_vader_scores(neg_joker, 'neg'))
df_p = pd.DataFrame(get_vader_scores(pos_joker, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 68 out of 123 0.5528455284552846
CORRECT PREDICT TRUE: 94 out of 123 0.7642276422764228

CASE STUDY 6: HW4 [Sentiment]¶

df_n = pd.DataFrame(get_vader_scores(neg_hw4, 'neg'))
df_p = pd.DataFrame(get_vader_scores(pos_hw4, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 26 out of 46 0.5652173913043478
CORRECT PREDICT TRUE: 45 out of 46 0.9782608695652174

CASE STUDY 7: HW4 [Deception]¶

df_n = pd.DataFrame(get_vader_scores(false_lie_hw4, 'neg'))
df_p = pd.DataFrame(get_vader_scores(true_lie_hw4, 'pos'))

df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')
df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')

display(df_n[:5])
display(df_p[:5])

sum_correct_n = (df_n['accurate']=='yes').sum()
sum_correct_p = (df_p['accurate']=='yes').sum()

print('CORRECT PREDICT FALSE:', sum_correct_n, 'out of', len(df_n), sum_correct_n/len(df_n))
print('CORRECT PREDICT TRUE:', sum_correct_p, 'out of', len(df_p), sum_correct_p/len(df_p))

CORRECT PREDICT FALSE: 13 out of 46 0.2826086956521739
CORRECT PREDICT TRUE: 32 out of 46 0.6956521739130435

NLTK with NaiveBayes¶

from nltk.classify import NaiveBayesClassifier
from nltk.tokenize import word_tokenize
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *

def get_tokens(sentence):
    tokens = word_tokenize(sentence)
    clean_tokens = [word.lower() for word in tokens if word.isalpha()]
    return clean_tokens

def get_nltk_train_test(array, label, num_train):
    tokens = [get_tokens(sentence) for sentence in array]
    docs = [(sent, label) for sent in tokens]
    train_docs = docs[:num_train]
    test_docs = docs[num_train:len(array)]
    return [train_docs, test_docs]


def get_nltk_NB(NEG_DATA, POS_DATA, num_train):
    train_neg, test_neg = get_nltk_train_test(NEG_DATA, 'neg', num_train)
    train_pos, test_pos = get_nltk_train_test(POS_DATA, 'pos', num_train)

    training_docs = train_neg + train_pos
    testing_docs = test_neg + test_pos

    sentim_analyzer = SentimentAnalyzer()
    all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])
    unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg)
    sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
    training_set = sentim_analyzer.apply_features(training_docs)
    test_set = sentim_analyzer.apply_features(testing_docs)

    trainer = NaiveBayesClassifier.train
    classifier = sentim_analyzer.train(trainer, training_set)
    
    results = []
    for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
        print('{0}: {1}'.format(key,value))

CASE STUDY 1: Kendra's Data¶

get_nltk_NB(neg_k, pos_k, 4)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 1.0
F-measure [neg]: 1.0
F-measure [pos]: 1.0
Precision [neg]: 1.0
Precision [pos]: 1.0
Recall [neg]: 1.0
Recall [pos]: 1.0

CASE STUDY 2: Ami's Data¶

get_nltk_NB(neg_a, pos_a, 4)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.5
F-measure [neg]: 0.6666666666666666
F-measure [pos]: None
Precision [neg]: 0.5
Precision [pos]: None
Recall [neg]: 1.0
Recall [pos]: 0.0

CASE STUDY 3: Cornell's Data¶

get_nltk_NB(neg_cornell, pos_cornell, 800)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.8125
F-measure [neg]: 0.8259860788863109
F-measure [pos]: 0.7967479674796748
Precision [neg]: 0.7705627705627706
Precision [pos]: 0.8698224852071006
Recall [neg]: 0.89
Recall [pos]: 0.735

CASE STUDY 4: Dirty Data¶

get_nltk_NB(neg_dirty, pos_dirty, 800)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.7775
F-measure [neg]: 0.7944572748267898
F-measure [pos]: 0.757493188010899
Precision [neg]: 0.7381974248927039
Precision [pos]: 0.8323353293413174
Recall [neg]: 0.86
Recall [pos]: 0.695

CASE STUDY 5: Joker Review Data¶

get_nltk_NB(neg_joker, pos_joker, 86)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.581081081081081
F-measure [neg]: 0.6593406593406593
F-measure [pos]: 0.456140350877193
Precision [neg]: 0.5555555555555556
Precision [pos]: 0.65
Recall [neg]: 0.8108108108108109
Recall [pos]: 0.35135135135135137

CASE STUDY 6: HW4 [Sentiment]¶

get_nltk_NB(neg_hw4, pos_hw4, 32)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.75
F-measure [neg]: 0.6956521739130435
F-measure [pos]: 0.787878787878788
Precision [neg]: 0.8888888888888888
Precision [pos]: 0.6842105263157895
Recall [neg]: 0.5714285714285714
Recall [pos]: 0.9285714285714286

CASE STUDY 7: HW4 [Deception]¶

get_nltk_NB(false_lie_hw4, true_lie_hw4, 32)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.5714285714285714
F-measure [neg]: 0.5714285714285714
F-measure [pos]: 0.5714285714285714
Precision [neg]: 0.5714285714285714
Precision [pos]: 0.5714285714285714
Recall [neg]: 0.5714285714285714
Recall [pos]: 0.5714285714285714

	label	prediction	sentiment	length	excerpt	accurate
0	neg	neg	-0.157143	76	WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICI...	yes
1	neg	neg	-0.750000	96	How can we trust Artificial Intelligence to dr...	yes
2	neg	neg	-0.775000	31	I hate artificial intelligence!	yes
3	neg	neg	-0.750000	47	My dog is terrified by artificial intelligence!	yes
4	neg	neg	-0.750000	68	Artificial intelligence is going to melt the b...	yes

	label	prediction	sentiment	length	excerpt	accurate
0	pos	neg	-0.112500	65	My dog is excited by the advancements in artif...	no
1	pos	neg	-0.075000	133	I'm excited for my child to grow up and have t...	no
2	pos	neg	-0.125000	31	I love artificial intelligence!	no
3	pos	neg	-0.300000	121	Order my groceries, pay my taxes, take my kids...	no
4	pos	neg	-0.133333	116	I'm grateful every day that my child will like...	no

	label	prediction	sentiment	length	excerpt	accurate
0	neg	neg	-0.054577	3554	that's exactly how long the movie felt to me ....	yes
1	neg	pos	0.025467	2929	" quest for camelot " is warner bros . ' firs...	no
2	neg	pos	0.003334	3365	so ask yourself what " 8mm " ( " eight millime...	no
3	neg	pos	0.022925	4418	synopsis : a mentally unstable man undergoing ...	no
4	neg	pos	0.043234	3911	capsule : in 2176 on the planet mars police ta...	no

	label	prediction	sentiment	length	excerpt	accurate
0	pos	pos	0.023663	4227	films adapted from comic books have had plenty...	yes
1	pos	pos	0.131092	2421	you've got mail works alot better than it dese...	yes
2	pos	pos	0.110626	6092	" jaws " is a rare film that grabs your atten...	yes
3	pos	pos	0.103847	4096	every now and then a movie comes along from a ...	yes
4	pos	neg	-0.070151	3898	moviemaking is a lot like being the general ma...	no

	label	prediction	sentiment	length	excerpt	accurate
0	neg	pos	0.026240	5953	bad . bad . \nbad . \nthat one word seems to p...	no
1	neg	pos	0.076040	3396	isn't it the ultimate sign of a movie's cinema...	no
2	neg	neg	-0.128733	2762	" gordy " is not a movie , it is a 90-minute-...	yes
3	neg	neg	-0.000485	3840	disconnect the phone line . \ndon't accept the...	yes
4	neg	pos	0.122770	2270	when robert forster found himself famous again...	no

	label	prediction	sentiment	length	excerpt	accurate
0	pos	pos	0.221173	4662	assume nothing . \nthe phrase is perhaps one o...	yes
1	pos	pos	0.089736	3839	plot : derek zoolander is a male model . \nhe ...	yes
2	pos	pos	0.206743	9380	i actually am a fan of the original 1961 or so...	yes
3	pos	pos	0.141905	2407	a movie that's been as highly built up as the ...	yes
4	pos	pos	0.176332	1840	" good will hunting " is two movies in one : ...	yes

	label	prediction	sentiment	length	excerpt	accurate
0	neg	neg	-0.004665	3777	by starring in amy heckerlings clueless two ...	yes
1	neg	pos	0.119184	3639	i have little against remakes and updates of o...	no
2	neg	pos	0.100886	4247	i cant recall a previous film experience where...	no
3	neg	pos	0.097526	4308	the tagline for this film is : some houses ar...	no
4	neg	pos	0.048745	5175	warner brothers ; rated pg-13 ( mild violence ...	no

	label	prediction	sentiment	length	excerpt	accurate
0	pos	pos	0.134641	4584	for the first reel of girls town , you just ca...	yes
1	pos	pos	0.137134	3102	field of dreams almost defies description . al...	yes
2	pos	pos	0.181355	3521	meet joe black is your classic boy-meets-girl ...	yes
3	pos	pos	0.104101	2192	an indian runner was more than a courier . he ...	yes
4	pos	pos	0.204967	4955	every once in a while , when an exceptional fa...	yes

	label	prediction	sentiment	length	excerpt	accurate
0	neg	pos	0.152083	1734	Missed Opportunity\nI had been very excited t...	no
1	neg	neg	-0.001852	328	5/5 for Phoenix's acting..\nI don't think the...	yes
2	neg	pos	0.200000	145	Everyone praised an overrated movie.\nOverrat...	no
3	neg	neg	-0.038095	350	What idiotic FIlm\nI can say that Phoenix is ...	yes
4	neg	pos	0.126398	711	Terrible\nThe only thing good about this movi...	no

	label	prediction	sentiment	length	excerpt	accurate
0	pos	pos	0.107162	5554	funny like a clown\nGreetings again from the ...	yes
1	pos	pos	0.014881	473	Only certain people can relate\nThis is a mov...	yes
2	pos	pos	0.008294	2509	"That's Life."\nIn an era of cinema so satura...	yes
3	pos	pos	0.036939	4022	Best DC movie since The Dark Knight Rises\nDC...	yes
4	pos	neg	-0.017162	1430	unbelievable, unrelatable, a bit boring to be...	no

	label	prediction	sentiment	length	excerpt	accurate
0	neg	neg	-0.273958	251	I went to XYZ restaurant last week and I was v...	yes
1	neg	pos	0.083333	359	In each of the diner dish there are at least o...	no
2	neg	neg	-0.134722	748	This is the last place you would want to dine ...	yes
3	neg	neg	-0.166667	378	I went to this restaurant where I had ordered ...	yes
4	neg	pos	0.152455	381	I went there with two friends at 6pm. Long que...	no

	label	prediction	sentiment	length	excerpt	accurate
0	pos	pos	0.626786	132	This restaurant ROCKS! I mean the food is grea...	yes
1	pos	pos	0.500000	441	Stronghearts cafe is the BEST! The owners have...	yes
2	pos	pos	0.480208	485	I went to cruise dinner in NYC with Spirit Cru...	yes
3	pos	pos	0.240278	404	Halos is home. I have been here numerous times...	yes
4	pos	pos	0.552083	324	The best restaurant I have ever been was a sma...	yes

	label	prediction	sentiment	length	excerpt	accurate
0	neg	pos	0.442752	386	Gannon’s Isle Ice Cream served the best ice cr...	no
1	neg	pos	0.197500	340	Hibachi the grill is one of my favorite restau...	no
2	neg	pos	0.353912	790	RIM KAAP One of the best Thai restaurants in t...	no
3	neg	pos	0.578788	391	It is a France restaurant which has Michelin t...	no
4	neg	pos	0.331373	710	Its hard to pick a favorite dining experience ...	no

	label	prediction	compound	excerpt	accurate
0	neg	neg	-0.6807	I went to XYZ restaurant last week and I was v...	yes
1	neg	neg	-0.6329	In each of the diner dish there are at least o...	yes
2	neg	pos	0.5161	This is the last place you would want to dine ...	no
3	neg	neg	-0.5423	I went to this restaurant where I had ordered ...	yes
4	neg	pos	0.8842	I went there with two friends at 6pm. Long que...	no

	label	prediction	sentiment	length	excerpt	accurate
0	pos	pos	0.000000	1	?	yes
1	pos	pos	0.236833	289	Twin Trees Cicero NY HUGE salad bar and high q...	yes
2	pos	neg	-0.249762	519	The worst restaurant that I have ever eaten in...	no
3	pos	pos	0.000000	1	?	yes
4	pos	pos	0.019481	234	I have been to a Asian restaurant in New York ...	yes

	label	prediction	compound	excerpt	accurate
0	neg	pos	0.5255	WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICI...	no
1	neg	pos	0.7712	How can we trust Artificial Intelligence to dr...	no
2	neg	neg	-0.2244	I hate artificial intelligence!	yes
3	neg	neg	-0.2942	My dog is terrified by artificial intelligence!	yes
4	neg	pos	0.5255	Artificial intelligence is going to melt the b...	no

	label	prediction	compound	excerpt	accurate
0	pos	pos	0.6705	My dog is excited by the advancements in artif...	yes
1	pos	pos	0.8271	I'm excited for my child to grow up and have t...	yes
2	pos	pos	0.8221	I love artificial intelligence!	yes
3	pos	pos	0.8213	Order my groceries, pay my taxes, take my kids...	yes
4	pos	pos	0.8402	I'm grateful every day that my child will like...	yes

	label	prediction	compound	excerpt	accurate
0	neg	pos	0.7836	that's exactly how long the movie felt to me ....	no
1	neg	neg	-0.8481	" quest for camelot " is warner bros . ' firs...	yes
2	neg	neg	-0.9753	so ask yourself what " 8mm " ( " eight millime...	yes
3	neg	pos	0.6824	synopsis : a mentally unstable man undergoing ...	no
4	neg	neg	-0.9879	capsule : in 2176 on the planet mars police ta...	yes

	label	prediction	compound	excerpt	accurate
0	pos	neg	-0.5887	films adapted from comic books have had plenty...	no
1	pos	pos	0.9964	you've got mail works alot better than it dese...	yes
2	pos	pos	0.9868	" jaws " is a rare film that grabs your atten...	yes
3	pos	pos	0.8825	every now and then a movie comes along from a ...	yes
4	pos	neg	-0.3525	moviemaking is a lot like being the general ma...	no

	label	prediction	compound	excerpt	accurate
0	neg	pos	0.9695	bad . bad . \nbad . \nthat one word seems to p...	no
1	neg	pos	0.1722	isn't it the ultimate sign of a movie's cinema...	no
2	neg	neg	-0.9970	" gordy " is not a movie , it is a 90-minute-...	yes
3	neg	pos	0.9861	disconnect the phone line . \ndon't accept the...	no
4	neg	pos	0.7445	when robert forster found himself famous again...	no

	label	prediction	compound	excerpt	accurate
0	pos	pos	0.9985	assume nothing . \nthe phrase is perhaps one o...	yes
1	pos	pos	0.9853	plot : derek zoolander is a male model . \nhe ...	yes
2	pos	pos	0.9998	i actually am a fan of the original 1961 or so...	yes
3	pos	pos	0.9671	a movie that's been as highly built up as the ...	yes
4	pos	pos	0.9300	" good will hunting " is two movies in one : ...	yes

	label	prediction	compound	excerpt	accurate
0	neg	neg	-0.9326	by starring in amy heckerlings clueless two ...	yes
1	neg	pos	0.8326	i have little against remakes and updates of o...	no
2	neg	pos	0.9491	i cant recall a previous film experience where...	no
3	neg	pos	0.9854	the tagline for this film is : some houses ar...	no
4	neg	neg	-0.8077	warner brothers ; rated pg-13 ( mild violence ...	yes

	label	prediction	compound	excerpt	accurate
0	pos	neg	-0.9888	for the first reel of girls town , you just ca...	no
1	pos	pos	0.9885	field of dreams almost defies description . al...	yes
2	pos	pos	0.9806	meet joe black is your classic boy-meets-girl ...	yes
3	pos	neg	-0.9614	an indian runner was more than a courier . he ...	no
4	pos	pos	0.9992	every once in a while , when an exceptional fa...	yes

	label	prediction	compound	excerpt	accurate
0	neg	pos	0.7501	Missed Opportunity\nI had been very excited t...	no
1	neg	pos	0.7184	5/5 for Phoenix's acting..\nI don't think the...	no
2	neg	pos	0.7269	Everyone praised an overrated movie.\nOverrat...	no
3	neg	neg	-0.6698	What idiotic FIlm\nI can say that Phoenix is ...	yes
4	neg	pos	0.7184	Terrible\nThe only thing good about this movi...	no

	label	prediction	compound	excerpt	accurate
0	pos	pos	0.9976	funny like a clown\nGreetings again from the ...	yes
1	pos	pos	0.9231	Only certain people can relate\nThis is a mov...	yes
2	pos	pos	0.9796	"That's Life."\nIn an era of cinema so satura...	yes
3	pos	neg	-0.9586	Best DC movie since The Dark Knight Rises\nDC...	no
4	pos	neg	-0.8813	unbelievable, unrelatable, a bit boring to be...	no

	label	prediction	compound	excerpt	accurate
0	pos	pos	0.9840	This restaurant ROCKS! I mean the food is grea...	yes
1	pos	pos	0.9702	Stronghearts cafe is the BEST! The owners have...	yes
2	pos	pos	0.9106	I went to cruise dinner in NYC with Spirit Cru...	yes
3	pos	pos	0.9349	Halos is home. I have been here numerous times...	yes
4	pos	pos	0.9686	The best restaurant I have ever been was a sma...	yes

	label	prediction	compound	excerpt	accurate
0	neg	pos	0.9328	Gannon’s Isle Ice Cream served the best ice cr...	no
1	neg	pos	0.8885	Hibachi the grill is one of my favorite restau...	no
2	neg	pos	0.9915	RIM KAAP One of the best Thai restaurants in t...	no
3	neg	pos	0.8625	It is a France restaurant which has Michelin t...	no
4	neg	pos	0.9360	Its hard to pick a favorite dining experience ...	no

	label	prediction	compound	excerpt	accurate
0	pos	pos	0.0000	?	yes
1	pos	pos	0.8321	Twin Trees Cicero NY HUGE salad bar and high q...	yes
2	pos	neg	-0.8641	The worst restaurant that I have ever eaten in...	no
3	pos	pos	0.0000	?	yes
4	pos	pos	0.5267	I have been to a Asian restaurant in New York ...	yes