HW2: VECTORIZATION (Pandas style!)¶

STEP 1: Import ALL the things¶

Import libraries¶

##########################################
# NOTE: I'm toying with the idea of requiring the library just above 
# when I use it so it makes more sense in context
##########################################
# import os
# import pandas as pd
# from nltk.tokenize import word_tokenize, sent_tokenize
# from nltk.sentiment import SentimentAnalyzer
# from nltk.sentiment.util import *
# from nltk.probability import FreqDist
# from nltk.sentiment.vader import SentimentIntensityAnalyzer
# sid = SentimentIntensityAnalyzer()

Import data from files¶

import os
def get_data_from_files(path):
    directory = os.listdir(path)
    results = []
    for file in directory:
        f=open(path+file)
        results.append(f.read())
        f.close()
    return results

# neg = get_data_from_files('../neg_cornell/')
# pos = get_data_from_files('../pos_cornell/')

# v1
# neg = get_data_from_files('../hw4_lie_false/')
# pos = get_data_from_files('../hw4_lie_true/')

pos = get_data_from_files('../hw4_lie_false/')
neg = get_data_from_files('../hw4_lie_true/')

# neg = get_data_from_files('../neg_hw4/')
# pos = get_data_from_files('../pos_hw4/')

STEP 2: Prep Data¶

STEP 2a: Turn that fresh text into a pandas DF¶

import pandas as pd
neg_df = pd.DataFrame(neg)
pos_df = pd.DataFrame(pos)

STEP 2b: Label it¶

pos_df['PoN'] = 'P'
neg_df['PoN'] = 'N'

STEP 2c: Combine the dfs¶

all_df = neg_df.append(pos_df)

all_df

STEP 3: TOKENIZE (and clean)!!¶

from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *

## Came back and added sentences for tokinization for "Summary experiment"
def get_sentence_tokens(review):
    return sent_tokenize(review)
    
all_df['sentences'] = all_df.apply(lambda x: get_sentence_tokens(x[0]), axis=1)
all_df['num_sentences'] = all_df.apply(lambda x: len(x['sentences']), axis=1)

def get_tokens(sentence):
    tokens = word_tokenize(sentence)
    clean_tokens = [word.lower() for word in tokens if word.isalpha()]
    return clean_tokens

all_df['tokens'] = all_df.apply(lambda x: get_tokens(x[0]), axis=1)
all_df['num_tokens'] = all_df.apply(lambda x: len(x['tokens']), axis=1)

all_df

STEP 4: Remove Stopwords¶

from nltk.corpus import stopwords
stop_words = set(stopwords.words("english"))
def remove_stopwords(sentence):
    filtered_text = []
    for word in sentence:
        if word not in stop_words:
            filtered_text.append(word)
    return filtered_text
all_df['no_sw'] = all_df.apply(lambda x: remove_stopwords(x['tokens']),axis=1)
all_df['num_no_sw'] = all_df.apply(lambda x: len(x['no_sw']),axis=1)

all_df

STEP 5: Create a Frequency Distribution¶

from nltk.probability import FreqDist
def get_most_common(tokens):
    fdist = FreqDist(tokens)
    return fdist.most_common(12)
all_df['topwords_unfil'] = all_df.apply(lambda x: get_most_common(x['tokens']),axis=1)

def get_most_common(tokens):
    fdist = FreqDist(tokens)
    return fdist.most_common(12)
all_df['topwords_fil'] = all_df.apply(lambda x: get_most_common(x['no_sw']),axis=1)

def get_fdist(tokens):
    return (FreqDist(tokens))
    
all_df['freq_dist'] = all_df.apply(lambda x: get_fdist(x['no_sw']),axis=1)
all_df['freq_dist_unfil'] = all_df.apply(lambda x: get_fdist(x['tokens']),axis=1)

all_df

STEP 6: Try Different Sentiment Analysis Tools¶

VADER¶

from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()
def get_vader_score(review):
    return sid.polarity_scores(review)

all_df['vader_all'] = all_df.apply(lambda x: get_vader_score(x[0]),axis=1)

def separate_vader_score(vader_score, key):
    return vader_score[key]

all_df['v_compound'] = all_df.apply(lambda x: separate_vader_score(x['vader_all'], 'compound'),axis=1)
all_df['v_neg'] = all_df.apply(lambda x: separate_vader_score(x['vader_all'], 'neg'),axis=1)
all_df['v_neu'] = all_df.apply(lambda x: separate_vader_score(x['vader_all'], 'neu'),axis=1)
all_df['v_pos'] = all_df.apply(lambda x: separate_vader_score(x['vader_all'], 'pos'),axis=1)

DIY SUMMARY¶

all_df[0][17]

17    Halos is home. I have been here numerous times...
17    I went to Joeys and had the best lasagna on th...
Name: 0, dtype: object

def get_weighted_freq_dist(review, freq_dist):
    try:
        max_freq = max(freq_dist.values())
        for word in freq_dist.keys():
            freq_dist[word] = (freq_dist[word]/max_freq)
        return freq_dist
    except:
        return 'nope'

all_df['weighted_freq_dist'] = all_df.apply(lambda x: get_weighted_freq_dist(x['sentences'], x['freq_dist']),axis=1)

def get_sentence_score(review, freq_dist):
    sentence_scores = {}
    for sent in review:
        for word in nltk.word_tokenize(sent.lower()):
            if word in freq_dist.keys():
                if len(sent.split(' ')) < 30:
                    if sent not in sentence_scores.keys():
                        sentence_scores[sent] = freq_dist[word]
                    else:
                        sentence_scores[sent] += freq_dist[word]
    return sentence_scores

all_df['sentence_scores'] = all_df.apply(lambda x: get_sentence_score(x['sentences'], x['freq_dist']),axis=1)

def get_summary_sentences(sentence_scores):
    sorted_sentences = sorted(sentence_scores.items(), key=lambda kv: kv[1], reverse=True)
    return ''.join(sent[0] for sent in sorted_sentences[:5])

all_df['summary_sentences'] = all_df.apply(lambda x: get_summary_sentences(x['sentence_scores']), axis=1)

summaries = all_df['summary_sentences'].tolist()

summaries[3]

''

Doing VADER on the Summary Section¶

all_df['vader_sum_all'] = all_df.apply(lambda x: get_vader_score(x['summary_sentences']),axis=1)

all_df['v_compound_sum'] = all_df.apply(lambda x: separate_vader_score(x['vader_sum_all'], 'compound'),axis=1)
all_df['v_neg_sum'] = all_df.apply(lambda x: separate_vader_score(x['vader_sum_all'], 'neg'),axis=1)
all_df['v_neu_sum'] = all_df.apply(lambda x: separate_vader_score(x['vader_sum_all'], 'neu'),axis=1)
all_df['v_pos_sum'] = all_df.apply(lambda x: separate_vader_score(x['vader_sum_all'], 'pos'),axis=1)

Doing VADER on the Most Frequent Words¶

def get_freq_words(freq_dist):
    sorted_words = sorted(freq_dist.items(), key=lambda kv: kv[1], reverse=True)
    return ' '.join(word[0] for word in sorted_words[:50])

all_df['v_freq_words'] = all_df.apply(lambda x: get_freq_words(x['freq_dist']), axis=1)

all_df['vader_fq_all'] = all_df.apply(lambda x: get_vader_score(x['v_freq_words']),axis=1)
all_df['v_compound_fd'] = all_df.apply(lambda x: separate_vader_score(x['vader_fq_all'], 'compound'),axis=1)
all_df['v_neg_fd'] = all_df.apply(lambda x: separate_vader_score(x['vader_fq_all'], 'neg'),axis=1)
all_df['v_neu_fd'] = all_df.apply(lambda x: separate_vader_score(x['vader_fq_all'], 'neu'),axis=1)
all_df['v_pos_fd'] = all_df.apply(lambda x: separate_vader_score(x['vader_fq_all'], 'pos'),axis=1)

STEP 7: Test `Step 6` with Machine Learning!!¶

Naive Bayes¶

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

def get_NB(small_df, labels):
    x_train, x_test, y_train, y_test = train_test_split(small_df.values, labels, test_size=0.3, random_state = 109)

    gnb = GaussianNB()
    gnb.fit(x_train, y_train)
    y_pred = gnb.predict(x_test)
    from sklearn import metrics
    print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

TEST 1: Vader Scores (Original)¶

small_df = all_df.filter(['v_compound','v_pos', 'v_neg', 'v_neu']) # 0.645
get_NB(small_df, all_df['PoN'])

Accuracy: 0.5

TEST 2: Vader Scores (from Summary)¶

small_df = all_df.filter(['v_compound_sum','v_pos_sum', 'v_neg_sum', 'v_neu_sum']) # 0.59
get_NB(small_df, all_df['PoN'])

Accuracy: 0.6071428571428571

TEST 3: Vader Scores (original) AND Vader Scores (summary)¶

small_df = all_df.filter(['v_compound_sum','v_pos_sum', 'v_neg_sum', 'v_neu_sum', 
                          'v_compound','v_pos', 'v_neg', 'v_neu']) # 0.618
get_NB(small_df, all_df['PoN'])

Accuracy: 0.5714285714285714

TEST 4: Vader Scores (50 most frequent -- filtered -- words)¶

small_df = all_df.filter(['v_compound_fd','v_pos_fd', 'v_neu_fd', 'v_neg_fd']) # 0.598
get_NB(small_df, all_df['PoN'])

Accuracy: 0.6428571428571429

TEST 5: All `compound` Vader Scores¶

small_df = all_df.filter(['v_compound_fd','v_compound_sum', 'v_compound']) # 0.615
get_NB(small_df, all_df['PoN'])

Accuracy: 0.6071428571428571

TEST 6: ALL THE NUMBERS!!¶

small_df = all_df.filter(['v_compound_sum','v_pos_sum', 'v_neg_sum', 'v_neu_sum', 
                          'v_compound_fd','v_pos_fd', 'v_neg_fd', 'v_neu_fd', 
                          'v_compound','v_pos', 'v_neg', 'v_neu']) # 0.613
get_NB(small_df, all_df['PoN'])

Accuracy: 0.6071428571428571

TEST 7: Test UNFILTERED most frequent words¶

def get_freq_words(freq_dist):
    sorted_words = sorted(freq_dist.items(), key=lambda kv: kv[1], reverse=True)
    return ' '.join(word[0] for word in sorted_words[:50])

all_df['v_freq_words_unfil'] = all_df.apply(lambda x: get_freq_words(x['freq_dist_unfil']), axis=1)

all_df['vader_fd_all_unfil'] = all_df.apply(lambda x: get_vader_score(x['v_freq_words_unfil']),axis=1)

all_df['v_compound_fd_uf'] = all_df.apply(lambda x: separate_vader_score(x['vader_fd_all_unfil'], 'compound'),axis=1)
all_df['v_neg_fd_uf'] = all_df.apply(lambda x: separate_vader_score(x['vader_fd_all_unfil'], 'neg'),axis=1)
all_df['v_neu_fd_uf'] = all_df.apply(lambda x: separate_vader_score(x['vader_fd_all_unfil'], 'neu'),axis=1)
all_df['v_pos_fd_uf'] = all_df.apply(lambda x: separate_vader_score(x['vader_fd_all_unfil'], 'pos'),axis=1)

small_df = all_df.filter(['v_compound_sum','v_pos_sum', 'v_neg_sum', 'v_neu_sum', 
                          'v_compound_fd','v_pos_fd', 'v_neg_fd', 'v_neu_fd', 
                          'v_compound_fd_uf','v_pos_fd_uf', 'v_neg_fd_uf', 'v_neu_fd_uf',
                          'v_compound','v_pos', 'v_neg', 'v_neu']) # 0.618
get_NB(small_df, all_df['PoN'])

Accuracy: 0.6071428571428571

small_df = all_df.filter(['v_compound_fd_uf','v_pos_fd_uf', 'v_neg_fd_uf', 'v_neu_fd_uf']) # 0.603
get_NB(small_df, all_df['PoN'])

Accuracy: 0.5357142857142857

summaries_pos = all_df[all_df['PoN'] == 'P']
summaries_neg = all_df[all_df['PoN'] == 'N']

summaries_pos_list = summaries_pos['summary_sentences'].tolist()
summaries_neg_list = summaries_neg['summary_sentences'].tolist()

STEP 8: Test NLTK: Naive Bayes from HW1¶

from nltk.classify import NaiveBayesClassifier
from nltk.tokenize import word_tokenize
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *

def get_tokens(sentence):
    tokens = word_tokenize(sentence)
    clean_tokens = [word.lower() for word in tokens if word.isalpha()]
    return clean_tokens

def get_nltk_train_test(array, label, num_train):
    tokens = [get_tokens(sentence) for sentence in array]
    docs = [(sent, label) for sent in tokens]
    train_docs = docs[:num_train]
    test_docs = docs[num_train:len(array)]
    return [train_docs, test_docs]


def get_nltk_NB(NEG_DATA, POS_DATA, num_train):
    train_neg, test_neg = get_nltk_train_test(NEG_DATA, 'neg', num_train)
    train_pos, test_pos = get_nltk_train_test(POS_DATA, 'pos', num_train)

    training_docs = train_neg + train_pos
    testing_docs = test_neg + test_pos

    sentim_analyzer = SentimentAnalyzer()
    all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])
    unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg)
    sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
    training_set = sentim_analyzer.apply_features(training_docs)
    test_set = sentim_analyzer.apply_features(testing_docs)

    trainer = NaiveBayesClassifier.train
    classifier = sentim_analyzer.train(trainer, training_set)
    
    results = []
    for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
        print('{0}: {1}'.format(key,value))

neg_df = all_df[all_df['PoN'] == 'N']
neg_df_list = neg_df[0].tolist()

pos_df = all_df[all_df['PoN'] == 'P']
pos_df_list = pos_df[0].tolist()

get_nltk_NB(neg_df_list, pos_df_list, 32)

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.5714285714285714
F-measure [neg]: 0.5714285714285714
F-measure [pos]: 0.5714285714285714
Precision [neg]: 0.5714285714285714
Precision [pos]: 0.5714285714285714
Recall [neg]: 0.5714285714285714
Recall [pos]: 0.5714285714285714

	0	PoN
0	?	N
1	Twin Trees Cicero NY HUGE salad bar and high q...	N
2	The worst restaurant that I have ever eaten in...	N
3	?	N
4	I have been to a Asian restaurant in New York ...	N
...	...	...
41	Mikes Pizza High Point NY Service was very slo...	P
42	After I went shopping with some of my friend w...	P
43	I entered the restaurant and a waitress came b...	P
44	Carlos Plate Shack was the worst dining experi...	P
45	Olive Oil Garden was very disappointing. I exp...	P

	0	PoN	sentences	num_sentences	tokens	num_tokens
0	?	N	[?]	1	[]	0
1	Twin Trees Cicero NY HUGE salad bar and high q...	N	[Twin Trees Cicero NY HUGE salad bar and high ...	4	[twin, trees, cicero, ny, huge, salad, bar, an...	53
2	The worst restaurant that I have ever eaten in...	N	[The worst restaurant that I have ever eaten i...	5	[the, worst, restaurant, that, i, have, ever, ...	105
3	?	N	[?]	1	[]	0
4	I have been to a Asian restaurant in New York ...	N	[I have been to a Asian restaurant in New York...	4	[i, have, been, to, a, asian, restaurant, in, ...	45
...	...	...	...	...	...	...
41	Mikes Pizza High Point NY Service was very slo...	P	[Mikes Pizza High Point NY Service was very sl...	4	[mikes, pizza, high, point, ny, service, was, ...	43
42	After I went shopping with some of my friend w...	P	[After I went shopping with some of my friend ...	2	[after, i, went, shopping, with, some, of, my,...	24
43	I entered the restaurant and a waitress came b...	P	[I entered the restaurant and a waitress came ...	5	[i, entered, the, restaurant, and, a, waitress...	99
44	Carlos Plate Shack was the worst dining experi...	P	[Carlos Plate Shack was the worst dining exper...	9	[carlos, plate, shack, was, the, worst, dining...	155
45	Olive Oil Garden was very disappointing. I exp...	P	[Olive Oil Garden was very disappointing., I e...	5	[olive, oil, garden, was, very, disappointing,...	43

	0	PoN	sentences	num_sentences	tokens	num_tokens	no_sw	num_no_sw
0	?	N	[?]	1	[]	0	[]	0
1	Twin Trees Cicero NY HUGE salad bar and high q...	N	[Twin Trees Cicero NY HUGE salad bar and high ...	4	[twin, trees, cicero, ny, huge, salad, bar, an...	53	[twin, trees, cicero, ny, huge, salad, bar, hi...	32
2	The worst restaurant that I have ever eaten in...	N	[The worst restaurant that I have ever eaten i...	5	[the, worst, restaurant, that, i, have, ever, ...	105	[worst, restaurant, ever, eaten, undoubtedly, ...	49
3	?	N	[?]	1	[]	0	[]	0
4	I have been to a Asian restaurant in New York ...	N	[I have been to a Asian restaurant in New York...	4	[i, have, been, to, a, asian, restaurant, in, ...	45	[asian, restaurant, new, york, city, menu, wri...	23
...	...	...	...	...	...	...	...	...
41	Mikes Pizza High Point NY Service was very slo...	P	[Mikes Pizza High Point NY Service was very sl...	4	[mikes, pizza, high, point, ny, service, was, ...	43	[mikes, pizza, high, point, ny, service, slow,...	26
42	After I went shopping with some of my friend w...	P	[After I went shopping with some of my friend ...	2	[after, i, went, shopping, with, some, of, my,...	24	[went, shopping, friend, went, dodo, restauran...	11
43	I entered the restaurant and a waitress came b...	P	[I entered the restaurant and a waitress came ...	5	[i, entered, the, restaurant, and, a, waitress...	99	[entered, restaurant, waitress, came, blanking...	49
44	Carlos Plate Shack was the worst dining experi...	P	[Carlos Plate Shack was the worst dining exper...	9	[carlos, plate, shack, was, the, worst, dining...	155	[carlos, plate, shack, worst, dining, experien...	88
45	Olive Oil Garden was very disappointing. I exp...	P	[Olive Oil Garden was very disappointing., I e...	5	[olive, oil, garden, was, very, disappointing,...	43	[olive, oil, garden, disappointing, expect, go...	23

	0	PoN	sentences	num_sentences	tokens	num_tokens	no_sw	num_no_sw	topwords_unfil	topwords_fil	freq_dist	freq_dist_unfil
0	?	N	[?]	1	[]	0	[]	0	[]	[]	{}	{}
1	Twin Trees Cicero NY HUGE salad bar and high q...	N	[Twin Trees Cicero NY HUGE salad bar and high ...	4	[twin, trees, cicero, ny, huge, salad, bar, an...	53	[twin, trees, cicero, ny, huge, salad, bar, hi...	32	[(and, 3), (to, 3), (are, 2), (the, 2), (twin,...	[(twin, 1), (trees, 1), (cicero, 1), (ny, 1), ...	{'twin': 1, 'trees': 1, 'cicero': 1, 'ny': 1, ...	{'twin': 1, 'trees': 1, 'cicero': 1, 'ny': 1, ...
2	The worst restaurant that I have ever eaten in...	N	[The worst restaurant that I have ever eaten i...	5	[the, worst, restaurant, that, i, have, ever, ...	105	[worst, restaurant, ever, eaten, undoubtedly, ...	49	[(the, 6), (i, 6), (a, 5), (was, 5), (and, 4),...	[(pepper, 3), (veggie, 2), (sandwich, 2), (red...	{'worst': 1, 'restaurant': 1, 'ever': 1, 'eate...	{'the': 6, 'worst': 1, 'restaurant': 1, 'that'...
3	?	N	[?]	1	[]	0	[]	0	[]	[]	{}	{}
4	I have been to a Asian restaurant in New York ...	N	[I have been to a Asian restaurant in New York...	4	[i, have, been, to, a, asian, restaurant, in, ...	45	[asian, restaurant, new, york, city, menu, wri...	23	[(i, 3), (a, 3), (the, 2), (is, 2), (by, 2), (...	[(asian, 1), (restaurant, 1), (new, 1), (york,...	{'asian': 1, 'restaurant': 1, 'new': 1, 'york'...	{'i': 3, 'have': 1, 'been': 1, 'to': 1, 'a': 3...
...	...	...	...	...	...	...	...	...	...	...	...	...
41	Mikes Pizza High Point NY Service was very slo...	P	[Mikes Pizza High Point NY Service was very sl...	4	[mikes, pizza, high, point, ny, service, was, ...	43	[mikes, pizza, high, point, ny, service, slow,...	26	[(pizza, 2), (was, 2), (you, 2), (would, 2), (...	[(pizza, 2), (would, 2), (mikes, 1), (high, 1)...	{'mikes': 1, 'pizza': 2, 'high': 1, 'point': 1...	{'mikes': 1, 'pizza': 2, 'high': 1, 'point': 1...
42	After I went shopping with some of my friend w...	P	[After I went shopping with some of my friend ...	2	[after, i, went, shopping, with, some, of, my,...	24	[went, shopping, friend, went, dodo, restauran...	11	[(i, 2), (went, 2), (of, 2), (after, 1), (shop...	[(went, 2), (shopping, 1), (friend, 1), (dodo,...	{'went': 2, 'shopping': 1, 'friend': 1, 'dodo'...	{'after': 1, 'i': 2, 'went': 2, 'shopping': 1,...
43	I entered the restaurant and a waitress came b...	P	[I entered the restaurant and a waitress came ...	5	[i, entered, the, restaurant, and, a, waitress...	99	[entered, restaurant, waitress, came, blanking...	49	[(the, 9), (i, 6), (and, 6), (to, 4), (a, 2), ...	[(waitress, 2), (waited, 2), (even, 2), (food,...	{'entered': 1, 'restaurant': 1, 'waitress': 2,...	{'i': 6, 'entered': 1, 'the': 9, 'restaurant':...
44	Carlos Plate Shack was the worst dining experi...	P	[Carlos Plate Shack was the worst dining exper...	9	[carlos, plate, shack, was, the, worst, dining...	155	[carlos, plate, shack, worst, dining, experien...	88	[(the, 9), (to, 7), (plate, 6), (and, 5), (my,...	[(plate, 6), (southern, 3), (comfort, 3), (ext...	{'carlos': 1, 'plate': 6, 'shack': 1, 'worst':...	{'carlos': 1, 'plate': 6, 'shack': 1, 'was': 3...
45	Olive Oil Garden was very disappointing. I exp...	P	[Olive Oil Garden was very disappointing., I e...	5	[olive, oil, garden, was, very, disappointing,...	43	[olive, oil, garden, disappointing, expect, go...	23	[(the, 3), (olive, 2), (oil, 2), (garden, 2), ...	[(olive, 2), (oil, 2), (garden, 2), (good, 2),...	{'olive': 2, 'oil': 2, 'garden': 2, 'disappoin...	{'olive': 2, 'oil': 2, 'garden': 2, 'was': 2, ...

HW2: VECTORIZATION (Pandas style!)¶

STEP 1: Import ALL the things¶

Import libraries¶

Import data from files¶

STEP 2: Prep Data¶

STEP 2a: Turn that fresh text into a pandas DF¶

STEP 2b: Label it¶

STEP 2c: Combine the dfs¶

STEP 3: TOKENIZE (and clean)!!¶

STEP 4: Remove Stopwords¶

STEP 5: Create a Frequency Distribution¶

STEP 6: Try Different Sentiment Analysis Tools¶

VADER¶

DIY SUMMARY¶

Doing VADER on the Summary Section¶

Doing VADER on the Most Frequent Words¶

STEP 7: Test Step 6 with Machine Learning!!¶

Naive Bayes¶

TEST 1: Vader Scores (Original)¶

TEST 2: Vader Scores (from Summary)¶

TEST 3: Vader Scores (original) AND Vader Scores (summary)¶

TEST 4: Vader Scores (50 most frequent -- filtered -- words)¶

TEST 5: All compound Vader Scores¶

TEST 6: ALL THE NUMBERS!!¶

TEST 7: Test UNFILTERED most frequent words¶

STEP 8: Test NLTK: Naive Bayes from HW1¶

STEP 7: Test `Step 6` with Machine Learning!!¶

TEST 5: All `compound` Vader Scores¶