Sentiment Analysis: TextBlob + Vader

via this tutorial |10-6-19

In [1]:
from textblob import TextBlob
from IPython.display import display, HTML
import os
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()
In [2]:
def get_data_from_files(path):
    directory = os.listdir(path)
    results = []
    for file in directory:
        f=open(path+file)
        results.append(f.read())
        f.close()
    return results

neg_k = get_data_from_files('AI_NEG/')
pos_k = get_data_from_files('AI_POS/')
neg_a = get_data_from_files('NEG/')
pos_a = get_data_from_files('POS/')
In [3]:
def get_pn(num):
    return 'neg' if num < 0 else 'pos'

def get_sentiment(array, label):
    blobs = [[TextBlob(text), text] for text in array]
    return ([{'label': label,
              'prediction': get_pn(obj.sentiment.polarity),
              'sentiment': obj.sentiment.polarity,
              'length': len(text), 
              'excerpt': text[:50]} for obj,text in blobs])

TEXT BLOB

CASE STUDY 1: Kendra's Data

In [4]:
display(pd.DataFrame(get_sentiment(neg_k, 'neg')))
display(pd.DataFrame(get_sentiment(pos_k, 'pos')))
label prediction sentiment length excerpt
0 neg neg -0.157143 76 WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICI...
1 neg neg -0.750000 96 How can we trust Artificial Intelligence to dr...
2 neg neg -0.775000 31 I hate artificial intelligence!
3 neg neg -0.750000 47 My dog is terrified by artificial intelligence!
4 neg neg -0.750000 68 Artificial intelligence is going to melt the b...
label prediction sentiment length excerpt
0 pos neg -0.112500 65 My dog is excited by the advancements in artif...
1 pos neg -0.075000 133 I'm excited for my child to grow up and have t...
2 pos neg -0.125000 31 I love artificial intelligence!
3 pos neg -0.300000 121 Order my groceries, pay my taxes, take my kids...
4 pos neg -0.133333 116 I'm grateful every day that my child will like...

CASE STUDY 2: Ami's Data

In [5]:
display(pd.DataFrame(get_sentiment(neg_a, 'neg')))
display(pd.DataFrame(get_sentiment(pos_a, 'pos')))
label prediction sentiment length excerpt
0 neg neg -0.054577 3554 that's exactly how long the movie felt to me ....
1 neg pos 0.025467 2929 " quest for camelot " is warner bros . ' firs...
2 neg pos 0.003334 3365 so ask yourself what " 8mm " ( " eight millime...
3 neg pos 0.022925 4418 synopsis : a mentally unstable man undergoing ...
4 neg pos 0.043234 3911 capsule : in 2176 on the planet mars police ta...
label prediction sentiment length excerpt
0 pos pos 0.023663 4227 films adapted from comic books have had plenty...
1 pos pos 0.131092 2421 you've got mail works alot better than it dese...
2 pos pos 0.110626 6092 " jaws " is a rare film that grabs your atten...
3 pos pos 0.103847 4096 every now and then a movie comes along from a ...
4 pos neg -0.070151 3898 moviemaking is a lot like being the general ma...

VADER

In [7]:
def get_vader_scores(array, label):
    vader_array = []
    for sentence in array:
        ss = sid.polarity_scores(sentence)
        vader_array.append({'label': label, 'compound': ss['compound'], 'excerpt': sentence[:50]})
    return vader_array

CASE STUDY 1: Kendra's Data

In [10]:
display(pd.DataFrame(get_vader_scores(neg_k, 'neg')))
display(pd.DataFrame(get_vader_scores(pos_k, 'pos')))
label compound excerpt
0 neg 0.5255 WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICI...
1 neg 0.7712 How can we trust Artificial Intelligence to dr...
2 neg -0.2244 I hate artificial intelligence!
3 neg -0.2942 My dog is terrified by artificial intelligence!
4 neg 0.5255 Artificial intelligence is going to melt the b...
label compound excerpt
0 pos 0.6705 My dog is excited by the advancements in artif...
1 pos 0.8271 I'm excited for my child to grow up and have t...
2 pos 0.8221 I love artificial intelligence!
3 pos 0.8213 Order my groceries, pay my taxes, take my kids...
4 pos 0.8402 I'm grateful every day that my child will like...

CASE STUDY 2: Ami's Data

In [12]:
display(pd.DataFrame(get_vader_scores(neg_a, 'neg')))
display(pd.DataFrame(get_vader_scores(pos_a, 'pos')))
label compound excerpt
0 neg 0.7836 that's exactly how long the movie felt to me ....
1 neg -0.8481 " quest for camelot " is warner bros . ' firs...
2 neg -0.9753 so ask yourself what " 8mm " ( " eight millime...
3 neg 0.6824 synopsis : a mentally unstable man undergoing ...
4 neg -0.9879 capsule : in 2176 on the planet mars police ta...
label compound excerpt
0 pos -0.5887 films adapted from comic books have had plenty...
1 pos 0.9964 you've got mail works alot better than it dese...
2 pos 0.9868 " jaws " is a rare film that grabs your atten...
3 pos 0.8825 every now and then a movie comes along from a ...
4 pos -0.3525 moviemaking is a lot like being the general ma...

NLTK with NaiveBayes

In [73]:
from nltk.classify import NaiveBayesClassifier
from nltk.tokenize import word_tokenize
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *

def get_tokens(sentence):
    tokens = word_tokenize(sentence)
    clean_tokens = [word.lower() for word in tokens if word.isalpha()]
    return clean_tokens

def get_nltk_train_test(array, label):
    tokens = [get_tokens(sentence) for sentence in array]
    docs = [(sent, label) for sent in tokens]
    train_docs = docs[:4]
    test_docs = docs[4:5]
    return [train_docs, test_docs]


def get_nltk_NB(NEG_DATA, POS_DATA):
    train_neg, test_neg = get_nltk_train_test(NEG_DATA, 'neg')
    train_pos, test_pos = get_nltk_train_test(POS_DATA, 'pos')

    training_docs = train_neg + train_pos
    testing_docs = test_neg + test_pos

    sentim_analyzer = SentimentAnalyzer()
    all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])
    unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg)
    sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
    training_set = sentim_analyzer.apply_features(training_docs)
    test_set = sentim_analyzer.apply_features(testing_docs)

    trainer = NaiveBayesClassifier.train
    classifier = sentim_analyzer.train(trainer, training_set)
    for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
        print('{0}: {1}'.format(key,value))

CASE STUDY 1: Kendra's Data

In [74]:
get_nltk_NB(neg_k, pos_k)
Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 1.0
F-measure [neg]: 1.0
F-measure [pos]: 1.0
Precision [neg]: 1.0
Precision [pos]: 1.0
Recall [neg]: 1.0
Recall [pos]: 1.0

CASE STUDY 2: Ami's Data

In [75]:
get_nltk_NB(neg_a, pos_a)
Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.5
F-measure [neg]: 0.6666666666666666
F-measure [pos]: None
Precision [neg]: 0.5
Precision [pos]: None
Recall [neg]: 1.0
Recall [pos]: 0.0
In [ ]: