Sentiment Analysis: TextBlob + Vader¶

via this tutorial |10-6-19

from textblob import TextBlob
from IPython.display import display, HTML
import os
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

def get_data_from_files(path):
    directory = os.listdir(path)
    results = []
    for file in directory:
        f=open(path+file)
        results.append(f.read())
        f.close()
    return results

neg_k = get_data_from_files('AI_NEG/')
pos_k = get_data_from_files('AI_POS/')
neg_a = get_data_from_files('NEG/')
pos_a = get_data_from_files('POS/')

def get_pn(num):
    return 'neg' if num < 0 else 'pos'

def get_sentiment(array, label):
    blobs = [[TextBlob(text), text] for text in array]
    return ([{'label': label,
              'prediction': get_pn(obj.sentiment.polarity),
              'sentiment': obj.sentiment.polarity,
              'length': len(text), 
              'excerpt': text[:50]} for obj,text in blobs])

TEXT BLOB¶

CASE STUDY 1: Kendra's Data¶

display(pd.DataFrame(get_sentiment(neg_k, 'neg')))
display(pd.DataFrame(get_sentiment(pos_k, 'pos')))

CASE STUDY 2: Ami's Data¶

display(pd.DataFrame(get_sentiment(neg_a, 'neg')))
display(pd.DataFrame(get_sentiment(pos_a, 'pos')))

VADER¶

def get_vader_scores(array, label):
    vader_array = []
    for sentence in array:
        ss = sid.polarity_scores(sentence)
        vader_array.append({'label': label, 'compound': ss['compound'], 'excerpt': sentence[:50]})
    return vader_array

CASE STUDY 1: Kendra's Data¶

display(pd.DataFrame(get_vader_scores(neg_k, 'neg')))
display(pd.DataFrame(get_vader_scores(pos_k, 'pos')))

CASE STUDY 2: Ami's Data¶

display(pd.DataFrame(get_vader_scores(neg_a, 'neg')))
display(pd.DataFrame(get_vader_scores(pos_a, 'pos')))

NLTK with NaiveBayes¶

from nltk.classify import NaiveBayesClassifier
from nltk.tokenize import word_tokenize
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *

def get_tokens(sentence):
    tokens = word_tokenize(sentence)
    clean_tokens = [word.lower() for word in tokens if word.isalpha()]
    return clean_tokens

def get_nltk_train_test(array, label):
    tokens = [get_tokens(sentence) for sentence in array]
    docs = [(sent, label) for sent in tokens]
    train_docs = docs[:4]
    test_docs = docs[4:5]
    return [train_docs, test_docs]


def get_nltk_NB(NEG_DATA, POS_DATA):
    train_neg, test_neg = get_nltk_train_test(NEG_DATA, 'neg')
    train_pos, test_pos = get_nltk_train_test(POS_DATA, 'pos')

    training_docs = train_neg + train_pos
    testing_docs = test_neg + test_pos

    sentim_analyzer = SentimentAnalyzer()
    all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])
    unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg)
    sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
    training_set = sentim_analyzer.apply_features(training_docs)
    test_set = sentim_analyzer.apply_features(testing_docs)

    trainer = NaiveBayesClassifier.train
    classifier = sentim_analyzer.train(trainer, training_set)
    for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
        print('{0}: {1}'.format(key,value))

CASE STUDY 1: Kendra's Data¶

get_nltk_NB(neg_k, pos_k)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 1.0
F-measure [neg]: 1.0
F-measure [pos]: 1.0
Precision [neg]: 1.0
Precision [pos]: 1.0
Recall [neg]: 1.0
Recall [pos]: 1.0

CASE STUDY 2: Ami's Data¶

get_nltk_NB(neg_a, pos_a)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.5
F-measure [neg]: 0.6666666666666666
F-measure [pos]: None
Precision [neg]: 0.5
Precision [pos]: None
Recall [neg]: 1.0
Recall [pos]: 0.0

	label	compound	excerpt
0	neg	0.5255	WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICI...
1	neg	0.7712	How can we trust Artificial Intelligence to dr...
2	neg	-0.2244	I hate artificial intelligence!
3	neg	-0.2942	My dog is terrified by artificial intelligence!
4	neg	0.5255	Artificial intelligence is going to melt the b...

	label	compound	excerpt
0	pos	0.6705	My dog is excited by the advancements in artif...
1	pos	0.8271	I'm excited for my child to grow up and have t...
2	pos	0.8221	I love artificial intelligence!
3	pos	0.8213	Order my groceries, pay my taxes, take my kids...
4	pos	0.8402	I'm grateful every day that my child will like...

	label	compound	excerpt
0	neg	0.7836	that's exactly how long the movie felt to me ....
1	neg	-0.8481	" quest for camelot " is warner bros . ' firs...
2	neg	-0.9753	so ask yourself what " 8mm " ( " eight millime...
3	neg	0.6824	synopsis : a mentally unstable man undergoing ...
4	neg	-0.9879	capsule : in 2176 on the planet mars police ta...

	label	compound	excerpt
0	pos	-0.5887	films adapted from comic books have had plenty...
1	pos	0.9964	you've got mail works alot better than it dese...
2	pos	0.9868	" jaws " is a rare film that grabs your atten...
3	pos	0.8825	every now and then a movie comes along from a ...
4	pos	-0.3525	moviemaking is a lot like being the general ma...

	label	prediction	sentiment	length	excerpt
0	neg	neg	-0.157143	76	WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICI...
1	neg	neg	-0.750000	96	How can we trust Artificial Intelligence to dr...
2	neg	neg	-0.775000	31	I hate artificial intelligence!
3	neg	neg	-0.750000	47	My dog is terrified by artificial intelligence!
4	neg	neg	-0.750000	68	Artificial intelligence is going to melt the b...

	label	prediction	sentiment	length	excerpt
0	pos	neg	-0.112500	65	My dog is excited by the advancements in artif...
1	pos	neg	-0.075000	133	I'm excited for my child to grow up and have t...
2	pos	neg	-0.125000	31	I love artificial intelligence!
3	pos	neg	-0.300000	121	Order my groceries, pay my taxes, take my kids...
4	pos	neg	-0.133333	116	I'm grateful every day that my child will like...

	label	prediction	sentiment	length	excerpt
0	neg	neg	-0.054577	3554	that's exactly how long the movie felt to me ....
1	neg	pos	0.025467	2929	" quest for camelot " is warner bros . ' firs...
2	neg	pos	0.003334	3365	so ask yourself what " 8mm " ( " eight millime...
3	neg	pos	0.022925	4418	synopsis : a mentally unstable man undergoing ...
4	neg	pos	0.043234	3911	capsule : in 2176 on the planet mars police ta...

	label	prediction	sentiment	length	excerpt
0	pos	pos	0.023663	4227	films adapted from comic books have had plenty...
1	pos	pos	0.131092	2421	you've got mail works alot better than it dese...
2	pos	pos	0.110626	6092	" jaws " is a rare film that grabs your atten...
3	pos	pos	0.103847	4096	every now and then a movie comes along from a ...
4	pos	neg	-0.070151	3898	moviemaking is a lot like being the general ma...